{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.53536, "global_step": 1673, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.125e-05, "loss": 1.5069, "theoretical_loss": 3.4847152446610696, "tokens_seen": 1662124032 }, { "epoch": 0.0, "learning_rate": 6.25e-05, "loss": 1.4985, "theoretical_loss": 3.4846202463681912, "tokens_seen": 1662648320 }, { "epoch": 0.0, "learning_rate": 9.375e-05, "loss": 1.4498, "theoretical_loss": 3.4845252864113894, "tokens_seen": 1663172608 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.4852290153503418, "objective/train/docs_used": 941634, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5609934329986572, "objective/train/original_loss": 2.5609934329986572, "objective/train/theoretical_loss": 3.484513419110922, "objective/train/tokens_used": 22097376, "objective/train/value_avg": -0.494873046875, "objective/train/value_loss": 0.24035517871379852, "objective/train/value_max": -0.490478515625, "objective/train/value_min": -0.49951171875, "objective/train/value_reward_corr": 0.05187214352461965, "objective/train/value_std": 0.0013017654418945312, "objective/train/weight_avg": 1.0497443675994873, "objective/train/weighted_lm_loss": 2.687887191772461, "objective/train/weights_max": 1.0511486530303955, "objective/train/weights_min": 0.9508827924728394, "theoretical_loss": 3.484513419110922, "tokens_seen": 1663238144 }, { "epoch": 0.0, "learning_rate": 0.000125, "loss": 1.4816, "theoretical_loss": 3.4844303647631154, "tokens_seen": 1663696896 }, { "epoch": 0.0, "learning_rate": 0.00015625, "loss": 1.4565, "theoretical_loss": 3.4843354813958505, "tokens_seen": 1664221184 }, { "epoch": 0.0, "learning_rate": 0.0001875, "loss": 1.5038, "theoretical_loss": 3.4842406362821032, "tokens_seen": 1664745472 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.4578465521335602, "objective/train/docs_used": 943199, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.0893185138702393, "objective/train/original_loss": 2.08931827545166, "objective/train/theoretical_loss": 3.48421693097756, "objective/train/tokens_used": 23735776, "objective/train/value_avg": -0.473388671875, "objective/train/value_loss": 0.2129276990890503, "objective/train/value_max": -0.45458984375, "objective/train/value_min": -0.49560546875, "objective/train/value_reward_corr": 0.04115803492314714, "objective/train/value_std": 0.006237030029296875, "objective/train/weight_avg": 1.0468661785125732, "objective/train/weighted_lm_loss": 2.186415672302246, "objective/train/weights_max": 1.0507301092147827, "objective/train/weights_min": 1.0036543607711792, "theoretical_loss": 3.48421693097756, "tokens_seen": 1664876544 }, { "epoch": 0.0, "learning_rate": 0.00021875, "loss": 1.4951, "theoretical_loss": 3.484145829394412, "tokens_seen": 1665269760 }, { "epoch": 0.0, "learning_rate": 0.00025, "loss": 1.4772, "theoretical_loss": 3.484051060705342, "tokens_seen": 1665794048 }, { "epoch": 0.0, "learning_rate": 0.00028125000000000003, "loss": 1.4171, "theoretical_loss": 3.483956330187487, "tokens_seen": 1666318336 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.37413716316223145, "objective/train/docs_used": 943856, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.685318946838379, "objective/train/original_loss": 2.6853184700012207, "objective/train/theoretical_loss": 3.4839208160787187, "objective/train/tokens_used": 25374176, "objective/train/value_avg": -0.430419921875, "objective/train/value_loss": 0.15903618931770325, "objective/train/value_max": -0.37841796875, "objective/train/value_min": -0.481689453125, "objective/train/value_reward_corr": 0.0857645922384168, "objective/train/value_std": 0.01611328125, "objective/train/weight_avg": 1.038219928741455, "objective/train/weighted_lm_loss": 2.78816819190979, "objective/train/weights_max": 1.0492851734161377, "objective/train/weights_min": 0.9517058730125427, "theoretical_loss": 3.4839208160787187, "tokens_seen": 1666514944 }, { "epoch": 0.0, "learning_rate": 0.0003125, "loss": 1.4217, "theoretical_loss": 3.4838616378134697, "tokens_seen": 1666842624 }, { "epoch": 0.0, "learning_rate": 0.00034375, "loss": 1.4478, "theoretical_loss": 3.483766983555941, "tokens_seen": 1667366912 }, { "epoch": 0.0, "learning_rate": 0.000375, "loss": 1.4345, "theoretical_loss": 3.4836723673875793, "tokens_seen": 1667891200 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.34323742985725403, "objective/train/docs_used": 945126, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5524609088897705, "objective/train/original_loss": 2.5524606704711914, "objective/train/theoretical_loss": 3.4836250735783065, "objective/train/tokens_used": 27012576, "objective/train/value_avg": -0.359375, "objective/train/value_loss": 0.12396484613418579, "objective/train/value_max": -0.254150390625, "objective/train/value_min": -0.476806640625, "objective/train/value_reward_corr": -0.09255629489755471, "objective/train/value_std": 0.03692626953125, "objective/train/weight_avg": 1.0349509716033936, "objective/train/weighted_lm_loss": 2.647698163986206, "objective/train/weights_max": 1.0487284660339355, "objective/train/weights_min": 0.9461237192153931, "theoretical_loss": 3.4836250735783065, "tokens_seen": 1668153344 }, { "epoch": 0.0, "learning_rate": 0.00040625000000000004, "loss": 1.4027, "theoretical_loss": 3.4835777892810924, "tokens_seen": 1668415488 }, { "epoch": 0.0, "learning_rate": 0.0004375, "loss": 1.3985, "theoretical_loss": 3.4834832492092147, "tokens_seen": 1668939776 }, { "epoch": 0.0, "learning_rate": 0.00046875, "loss": 1.4011, "theoretical_loss": 3.48338874714471, "tokens_seen": 1669464064 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.23369024693965912, "objective/train/docs_used": 945699, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.318622350692749, "objective/train/original_loss": 2.318622350692749, "objective/train/theoretical_loss": 3.483329702642922, "objective/train/tokens_used": 28650976, "objective/train/value_avg": -0.239990234375, "objective/train/value_loss": 0.06014876812696457, "objective/train/value_max": -0.07977294921875, "objective/train/value_min": -0.442626953125, "objective/train/value_reward_corr": -0.15528615660735792, "objective/train/value_std": 0.069091796875, "objective/train/weight_avg": 1.0236725807189941, "objective/train/weighted_lm_loss": 2.3841559886932373, "objective/train/weights_max": 1.0451645851135254, "objective/train/weights_min": 0.9836625456809998, "theoretical_loss": 3.483329702642922, "tokens_seen": 1669791744 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 1.4122, "theoretical_loss": 3.483294283060369, "tokens_seen": 1669988352 }, { "epoch": 0.01, "learning_rate": 0.00053125, "loss": 1.3632, "theoretical_loss": 3.4831998569290104, "tokens_seen": 1670512640 }, { "epoch": 0.01, "learning_rate": 0.0005625000000000001, "loss": 1.3608, "theoretical_loss": 3.4831054687234833, "tokens_seen": 1671036928 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.15091031789779663, "objective/train/docs_used": 946982, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9974899291992188, "objective/train/original_loss": 2.997490167617798, "objective/train/theoretical_loss": 3.483034702441845, "objective/train/tokens_used": 30289376, "objective/train/value_avg": -0.185302734375, "objective/train/value_loss": 0.03601464629173279, "objective/train/value_max": -0.0199432373046875, "objective/train/value_min": -0.39599609375, "objective/train/value_reward_corr": 0.1667326559350718, "objective/train/value_std": 0.06829833984375, "objective/train/weight_avg": 1.0152721405029297, "objective/train/weighted_lm_loss": 3.051727056503296, "objective/train/weights_max": 1.0403271913528442, "objective/train/weights_min": 0.928127646446228, "theoretical_loss": 3.483034702441845, "tokens_seen": 1671430144 }, { "epoch": 0.01, "learning_rate": 0.00059375, "loss": 1.3869, "theoretical_loss": 3.4830111184166612, "tokens_seen": 1671561216 }, { "epoch": 0.01, "learning_rate": 0.000625, "loss": 1.3178, "theoretical_loss": 3.482916805981448, "tokens_seen": 1672085504 }, { "epoch": 0.01, "learning_rate": 0.00065625, "loss": 1.3011, "theoretical_loss": 3.4828225313907746, "tokens_seen": 1672609792 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.09622465074062347, "objective/train/docs_used": 947668, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.002412796020508, "objective/train/original_loss": 3.002413034439087, "objective/train/theoretical_loss": 3.4827400721470254, "objective/train/tokens_used": 31927776, "objective/train/value_avg": -0.1270751953125, "objective/train/value_loss": 0.022656194865703583, "objective/train/value_max": -0.004810333251953125, "objective/train/value_min": -0.34765625, "objective/train/value_reward_corr": 0.20687232261522706, "objective/train/value_std": 0.06207275390625, "objective/train/weight_avg": 1.0097355842590332, "objective/train/weighted_lm_loss": 3.040234088897705, "objective/train/weights_max": 1.035300374031067, "objective/train/weights_min": 0.9148141741752625, "theoretical_loss": 3.4827400721470254, "tokens_seen": 1673068544 }, { "epoch": 0.01, "learning_rate": 0.0006875, "loss": 1.3336, "theoretical_loss": 3.4827282946175995, "tokens_seen": 1673134080 }, { "epoch": 0.01, "learning_rate": 0.00071875, "loss": 1.339, "theoretical_loss": 3.4826340956349093, "tokens_seen": 1673658368 }, { "epoch": 0.01, "learning_rate": 0.00075, "loss": 1.329, "theoretical_loss": 3.482539934415719, "tokens_seen": 1674182656 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.06772001087665558, "objective/train/docs_used": 948408, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.540395975112915, "objective/train/original_loss": 2.540395975112915, "objective/train/theoretical_loss": 3.4824458109330694, "objective/train/tokens_used": 33566176, "objective/train/value_avg": -0.07220458984375, "objective/train/value_loss": 0.007030518725514412, "objective/train/value_max": -0.0026836395263671875, "objective/train/value_min": -0.291015625, "objective/train/value_reward_corr": 0.008605605365664556, "objective/train/value_std": 0.043060302734375, "objective/train/weight_avg": 1.0068073272705078, "objective/train/weighted_lm_loss": 2.563676595687866, "objective/train/weights_max": 1.0294532775878906, "objective/train/weights_min": 0.9425356984138489, "theoretical_loss": 3.4824458109330694, "tokens_seen": 1674706944 }, { "epoch": 0.01, "learning_rate": 0.00078125, "loss": 1.324, "theoretical_loss": 3.4824458109330694, "tokens_seen": 1674706944 }, { "epoch": 0.01, "learning_rate": 0.0008125000000000001, "loss": 1.3396, "theoretical_loss": 3.482351725160031, "tokens_seen": 1675231232 }, { "epoch": 0.01, "learning_rate": 0.00084375, "loss": 1.2938, "theoretical_loss": 3.482257677069702, "tokens_seen": 1675755520 }, { "epoch": 0.01, "learning_rate": 0.000875, "loss": 1.3135, "theoretical_loss": 3.4821636666352056, "tokens_seen": 1676279808 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.018593251705169678, "objective/train/docs_used": 949481, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.314517021179199, "objective/train/original_loss": 2.314517021179199, "objective/train/theoretical_loss": 3.4821519179772316, "objective/train/tokens_used": 35204576, "objective/train/value_avg": -0.048828125, "objective/train/value_loss": 0.013985752128064632, "objective/train/value_max": -0.001964569091796875, "objective/train/value_min": -0.249267578125, "objective/train/value_reward_corr": -0.06280137961610016, "objective/train/value_std": 0.0350341796875, "objective/train/weight_avg": 1.0019280910491943, "objective/train/weighted_lm_loss": 2.3259332180023193, "objective/train/weights_max": 1.0251080989837646, "objective/train/weights_min": 0.9080712795257568, "theoretical_loss": 3.4821519179772316, "tokens_seen": 1676345344 }, { "epoch": 0.01, "learning_rate": 0.00090625, "loss": 1.3201, "theoretical_loss": 3.482069693829695, "tokens_seen": 1676804096 }, { "epoch": 0.01, "learning_rate": 0.0009375, "loss": 1.3311, "theoretical_loss": 3.4819757586263504, "tokens_seen": 1677328384 }, { "epoch": 0.01, "learning_rate": 0.00096875, "loss": 1.3359, "theoretical_loss": 3.4818818609983797, "tokens_seen": 1677852672 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.02113819681107998, "objective/train/docs_used": 950694, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4766831398010254, "objective/train/original_loss": 2.4766831398010254, "objective/train/theoretical_loss": 3.4818583924594018, "objective/train/tokens_used": 36842976, "objective/train/value_avg": -0.03424072265625, "objective/train/value_loss": 0.00494113564491272, "objective/train/value_max": -0.0004973411560058594, "objective/train/value_min": -0.226318359375, "objective/train/value_reward_corr": 0.029900853727051296, "objective/train/value_std": 0.0267333984375, "objective/train/weight_avg": 1.0021382570266724, "objective/train/weighted_lm_loss": 2.48506498336792, "objective/train/weights_max": 1.0228263139724731, "objective/train/weights_min": 0.9085569977760315, "theoretical_loss": 3.4818583924594018, "tokens_seen": 1677983744 }, { "epoch": 0.01, "learning_rate": 0.001, "loss": 1.3178, "theoretical_loss": 3.4817880009190167, "tokens_seen": 1678376960 }, { "epoch": 0.01, "learning_rate": 0.0009996766892984158, "loss": 1.3257, "theoretical_loss": 3.4816941783615256, "tokens_seen": 1678901248 }, { "epoch": 0.01, "learning_rate": 0.0009993533785968315, "loss": 1.3549, "theoretical_loss": 3.4816003932991944, "tokens_seen": 1679425536 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.02700362727046013, "objective/train/docs_used": 951339, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.574002981185913, "objective/train/original_loss": 2.574002981185913, "objective/train/theoretical_loss": 3.481565233562095, "objective/train/tokens_used": 38481376, "objective/train/value_avg": -0.03619384765625, "objective/train/value_loss": 0.00204839906655252, "objective/train/value_max": -0.00021147727966308594, "objective/train/value_min": -0.21435546875, "objective/train/value_reward_corr": 0.1106871028930653, "objective/train/value_std": 0.0265960693359375, "objective/train/weight_avg": 1.0027105808258057, "objective/train/weighted_lm_loss": 2.5846498012542725, "objective/train/weights_max": 1.0215901136398315, "objective/train/weights_min": 0.982610285282135, "theoretical_loss": 3.481565233562095, "tokens_seen": 1679622144 }, { "epoch": 0.01, "learning_rate": 0.0009990300678952473, "loss": 1.3168, "theoretical_loss": 3.481506645705341, "tokens_seen": 1679949824 }, { "epoch": 0.01, "learning_rate": 0.000998706757193663, "loss": 1.3337, "theoretical_loss": 3.481412935553311, "tokens_seen": 1680474112 }, { "epoch": 0.01, "learning_rate": 0.0009983834464920788, "loss": 1.3602, "theoretical_loss": 3.481319262816474, "tokens_seen": 1680998400 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.015650054439902306, "objective/train/docs_used": 952727, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.644263505935669, "objective/train/original_loss": 2.644263505935669, "objective/train/theoretical_loss": 3.4812724404704394, "objective/train/tokens_used": 40119776, "objective/train/value_avg": -0.034698486328125, "objective/train/value_loss": 0.005067009478807449, "objective/train/value_max": -0.000743865966796875, "objective/train/value_min": -0.2109375, "objective/train/value_reward_corr": 0.2143254247223497, "objective/train/value_std": 0.0258026123046875, "objective/train/weight_avg": 1.0015900135040283, "objective/train/weighted_lm_loss": 2.6496243476867676, "objective/train/weights_max": 1.0212239027023315, "objective/train/weights_min": 0.9097957015037537, "theoretical_loss": 3.4812724404704394, "tokens_seen": 1681260544 }, { "epoch": 0.01, "learning_rate": 0.0009980601357904948, "loss": 1.3416, "theoretical_loss": 3.48122562746823, "tokens_seen": 1681522688 }, { "epoch": 0.01, "learning_rate": 0.0009977368250889105, "loss": 1.3594, "theoretical_loss": 3.481132029482005, "tokens_seen": 1682046976 }, { "epoch": 0.01, "learning_rate": 0.0009974135143873263, "loss": 1.2852, "theoretical_loss": 3.481038468831253, "tokens_seen": 1682571264 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.026778804138302803, "objective/train/docs_used": 953335, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.690138578414917, "objective/train/original_loss": 2.690138816833496, "objective/train/theoretical_loss": 3.480980012372167, "objective/train/tokens_used": 41758176, "objective/train/value_avg": -0.031097412109375, "objective/train/value_loss": 0.00174819550011307, "objective/train/value_max": -0.00046181678771972656, "objective/train/value_min": -0.180419921875, "objective/train/value_reward_corr": 0.09580148865151333, "objective/train/value_std": 0.0243072509765625, "objective/train/weight_avg": 1.002686619758606, "objective/train/weighted_lm_loss": 2.7005701065063477, "objective/train/weights_max": 1.0181187391281128, "objective/train/weights_min": 0.9496287703514099, "theoretical_loss": 3.480980012372167, "tokens_seen": 1682898944 }, { "epoch": 0.01, "learning_rate": 0.000997090203685742, "loss": 1.3597, "theoretical_loss": 3.480944945489453, "tokens_seen": 1683095552 }, { "epoch": 0.01, "learning_rate": 0.0009967668929841578, "loss": 1.3545, "theoretical_loss": 3.4808514594301134, "tokens_seen": 1683619840 }, { "epoch": 0.01, "learning_rate": 0.0009964435822825736, "loss": 1.3393, "theoretical_loss": 3.4807580106267695, "tokens_seen": 1684144128 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.020779820159077644, "objective/train/docs_used": 954676, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5975658893585205, "objective/train/original_loss": 2.5975656509399414, "objective/train/theoretical_loss": 3.480687948457602, "objective/train/tokens_used": 43396576, "objective/train/value_avg": -0.0291900634765625, "objective/train/value_loss": 0.0018343148985877633, "objective/train/value_max": -0.0002199411392211914, "objective/train/value_min": -0.230224609375, "objective/train/value_reward_corr": -0.1541098351897453, "objective/train/value_std": 0.025848388671875, "objective/train/weight_avg": 1.002087116241455, "objective/train/weighted_lm_loss": 2.607747793197632, "objective/train/weights_max": 1.023224115371704, "objective/train/weights_min": 0.9699746370315552, "theoretical_loss": 3.480687948457602, "tokens_seen": 1684537344 }, { "epoch": 0.01, "learning_rate": 0.0009961202715809893, "loss": 1.2964, "theoretical_loss": 3.4806645990529814, "tokens_seen": 1684668416 }, { "epoch": 0.01, "learning_rate": 0.000995796960879405, "loss": 1.3396, "theoretical_loss": 3.480571224682339, "tokens_seen": 1685192704 }, { "epoch": 0.01, "learning_rate": 0.0009954736501778208, "loss": 1.3164, "theoretical_loss": 3.4804778874884565, "tokens_seen": 1685716992 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.014628480188548565, "objective/train/docs_used": 955358, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.939648389816284, "objective/train/original_loss": 2.939648389816284, "objective/train/theoretical_loss": 3.48039624791965, "objective/train/tokens_used": 45034976, "objective/train/value_avg": -0.040130615234375, "objective/train/value_loss": 0.013309458270668983, "objective/train/value_max": -0.0004513263702392578, "objective/train/value_min": -0.262939453125, "objective/train/value_reward_corr": 0.20001015791586918, "objective/train/value_std": 0.0322265625, "objective/train/weight_avg": 1.001528024673462, "objective/train/weighted_lm_loss": 2.9461116790771484, "objective/train/weights_max": 1.0264393091201782, "objective/train/weights_min": 0.9067297577857971, "theoretical_loss": 3.48039624791965, "tokens_seen": 1686175744 }, { "epoch": 0.02, "learning_rate": 0.0009951503394762366, "loss": 1.3383, "theoretical_loss": 3.480384587444977, "tokens_seen": 1686241280 }, { "epoch": 0.02, "learning_rate": 0.0009948270287746526, "loss": 1.3674, "theoretical_loss": 3.4802913245255707, "tokens_seen": 1686765568 }, { "epoch": 0.02, "learning_rate": 0.0009945037180730683, "loss": 1.3146, "theoretical_loss": 3.4801980987039327, "tokens_seen": 1687289856 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.01846342161297798, "objective/train/docs_used": 956671, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.478999137878418, "objective/train/original_loss": 2.478998899459839, "objective/train/theoretical_loss": 3.480104909953786, "objective/train/tokens_used": 46673376, "objective/train/value_avg": -0.0423583984375, "objective/train/value_loss": 0.006455108057707548, "objective/train/value_max": -0.0002269744873046875, "objective/train/value_min": -0.52880859375, "objective/train/value_reward_corr": 0.5217319085258776, "objective/train/value_std": 0.060028076171875, "objective/train/weight_avg": 1.001878261566162, "objective/train/weighted_lm_loss": 2.485156297683716, "objective/train/weights_max": 1.0441792011260986, "objective/train/weights_min": 0.9251841902732849, "theoretical_loss": 3.480104909953786, "tokens_seen": 1687814144 }, { "epoch": 0.02, "learning_rate": 0.0009941804073714839, "loss": 1.3346, "theoretical_loss": 3.480104909953786, "tokens_seen": 1687814144 }, { "epoch": 0.02, "learning_rate": 0.0009938570966698998, "loss": 1.3241, "theoretical_loss": 3.4800117582488808, "tokens_seen": 1688338432 }, { "epoch": 0.02, "learning_rate": 0.0009935337859683156, "loss": 1.3132, "theoretical_loss": 3.4799186435629927, "tokens_seen": 1688862720 }, { "epoch": 0.02, "learning_rate": 0.0009932104752667314, "loss": 1.3413, "theoretical_loss": 3.479825565869926, "tokens_seen": 1689387008 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.016234280541539192, "objective/train/docs_used": 957370, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.923020601272583, "objective/train/original_loss": 2.923020601272583, "objective/train/theoretical_loss": 3.4798139337580465, "objective/train/tokens_used": 48311776, "objective/train/value_avg": -0.029449462890625, "objective/train/value_loss": 0.004371629096567631, "objective/train/value_max": -0.0005054473876953125, "objective/train/value_min": -0.2047119140625, "objective/train/value_reward_corr": 0.028923472785162597, "objective/train/value_std": 0.02117919921875, "objective/train/weight_avg": 1.0016449689865112, "objective/train/weighted_lm_loss": 2.930434226989746, "objective/train/weights_max": 1.020424246788025, "objective/train/weights_min": 0.9172429442405701, "theoretical_loss": 3.4798139337580465, "tokens_seen": 1689452544 }, { "epoch": 0.02, "learning_rate": 0.0009928871645651471, "loss": 1.3667, "theoretical_loss": 3.4797325251435094, "tokens_seen": 1689911296 }, { "epoch": 0.02, "learning_rate": 0.0009925638538635629, "loss": 1.3065, "theoretical_loss": 3.4796395213576004, "tokens_seen": 1690435584 }, { "epoch": 0.02, "learning_rate": 0.0009922405431619786, "loss": 1.2826, "theoretical_loss": 3.479546554486081, "tokens_seen": 1690959872 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.02613617666065693, "objective/train/docs_used": 958661, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.751319646835327, "objective/train/original_loss": 2.751319646835327, "objective/train/theoretical_loss": 3.479523318533017, "objective/train/tokens_used": 49950176, "objective/train/value_avg": -0.032684326171875, "objective/train/value_loss": 0.0028091920539736748, "objective/train/value_max": -0.0003275871276855469, "objective/train/value_min": -0.22265625, "objective/train/value_reward_corr": 0.06452474363255457, "objective/train/value_std": 0.0235748291015625, "objective/train/weight_avg": 1.0026276111602783, "objective/train/weighted_lm_loss": 2.760897159576416, "objective/train/weights_max": 1.022439956665039, "objective/train/weights_min": 0.920654296875, "theoretical_loss": 3.479523318533017, "tokens_seen": 1691090944 }, { "epoch": 0.02, "learning_rate": 0.0009919172324603944, "loss": 1.3482, "theoretical_loss": 3.479453624502862, "tokens_seen": 1691484160 }, { "epoch": 0.02, "learning_rate": 0.0009915939217588102, "loss": 1.3601, "theoretical_loss": 3.4793607313818784, "tokens_seen": 1692008448 }, { "epoch": 0.02, "learning_rate": 0.0009912706110572261, "loss": 1.3852, "theoretical_loss": 3.4792678750970936, "tokens_seen": 1692532736 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.016751615330576897, "objective/train/docs_used": 959159, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7094051837921143, "objective/train/original_loss": 2.709404945373535, "objective/train/theoretical_loss": 3.4792330634818214, "objective/train/tokens_used": 51588576, "objective/train/value_avg": -0.029052734375, "objective/train/value_loss": 0.005097657907754183, "objective/train/value_max": -0.0006823539733886719, "objective/train/value_min": -0.335205078125, "objective/train/value_reward_corr": 0.06617998087255883, "objective/train/value_std": 0.02276611328125, "objective/train/weight_avg": 1.0017001628875732, "objective/train/weighted_lm_loss": 2.7164652347564697, "objective/train/weights_max": 1.0338839292526245, "objective/train/weights_min": 0.9133527874946594, "theoretical_loss": 3.4792330634818214, "tokens_seen": 1692729344 }, { "epoch": 0.02, "learning_rate": 0.0009909473003556419, "loss": 1.3228, "theoretical_loss": 3.4791750556224956, "tokens_seen": 1693057024 }, { "epoch": 0.02, "learning_rate": 0.0009906239896540574, "loss": 1.3428, "theoretical_loss": 3.479082272932101, "tokens_seen": 1693581312 }, { "epoch": 0.02, "learning_rate": 0.0009903006789524734, "loss": 1.3186, "theoretical_loss": 3.4789895269999507, "tokens_seen": 1694105600 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.0161640252918005, "objective/train/docs_used": 960536, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6739766597747803, "objective/train/original_loss": 2.6739766597747803, "objective/train/theoretical_loss": 3.4789431678101126, "objective/train/tokens_used": 53226976, "objective/train/value_avg": -0.0330810546875, "objective/train/value_loss": 0.0050297933630645275, "objective/train/value_max": -0.0001838207244873047, "objective/train/value_min": -0.2237548828125, "objective/train/value_reward_corr": 0.1976027814865125, "objective/train/value_std": 0.0262603759765625, "objective/train/weight_avg": 1.0016411542892456, "objective/train/weighted_lm_loss": 2.6808063983917236, "objective/train/weights_max": 1.0197616815567017, "objective/train/weights_min": 0.9131374359130859, "theoretical_loss": 3.4789431678101126, "tokens_seen": 1694367744 }, { "epoch": 0.02, "learning_rate": 0.0009899773682508892, "loss": 1.3481, "theoretical_loss": 3.4788968178001136, "tokens_seen": 1694629888 }, { "epoch": 0.02, "learning_rate": 0.000989654057549305, "loss": 1.3207, "theoretical_loss": 3.4788041453066834, "tokens_seen": 1695154176 }, { "epoch": 0.02, "learning_rate": 0.0009893307468477207, "loss": 1.3178, "theoretical_loss": 3.478711509493782, "tokens_seen": 1695678464 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.01537461206316948, "objective/train/docs_used": 961177, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9090325832366943, "objective/train/original_loss": 2.9090328216552734, "objective/train/theoretical_loss": 3.4786536307260603, "objective/train/tokens_used": 54865376, "objective/train/value_avg": -0.0305328369140625, "objective/train/value_loss": 0.0045534479431807995, "objective/train/value_max": -0.0010900497436523438, "objective/train/value_min": -0.2379150390625, "objective/train/value_reward_corr": 0.15167159510521547, "objective/train/value_std": 0.022796630859375, "objective/train/weight_avg": 1.0015599727630615, "objective/train/weighted_lm_loss": 2.915318250656128, "objective/train/weights_max": 1.022596836090088, "objective/train/weights_min": 0.9115495085716248, "theoretical_loss": 3.4786536307260603, "tokens_seen": 1696006144 }, { "epoch": 0.02, "learning_rate": 0.0009890074361461364, "loss": 1.3561, "theoretical_loss": 3.4786189103355554, "tokens_seen": 1696202752 }, { "epoch": 0.02, "learning_rate": 0.0009886841254445522, "loss": 1.3418, "theoretical_loss": 3.478526347806177, "tokens_seen": 1696727040 }, { "epoch": 0.02, "learning_rate": 0.000988360814742968, "loss": 1.365, "theoretical_loss": 3.4784338218798467, "tokens_seen": 1697251328 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.017883040010929108, "objective/train/docs_used": 961828, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.691891670227051, "objective/train/original_loss": 2.69189190864563, "objective/train/theoretical_loss": 3.478364451440343, "objective/train/tokens_used": 56503776, "objective/train/value_avg": -0.0267333984375, "objective/train/value_loss": 0.0029996109660714865, "objective/train/value_max": -0.0002892017364501953, "objective/train/value_min": -0.2406005859375, "objective/train/value_reward_corr": 0.07037447313687664, "objective/train/value_std": 0.0196533203125, "objective/train/weight_avg": 1.0018031597137451, "objective/train/weighted_lm_loss": 2.6986746788024902, "objective/train/weights_max": 1.0174636840820312, "objective/train/weights_min": 0.9180039167404175, "theoretical_loss": 3.478364451440343, "tokens_seen": 1697644544 }, { "epoch": 0.02, "learning_rate": 0.000988037504041384, "loss": 1.3154, "theoretical_loss": 3.478341332530789, "tokens_seen": 1697775616 }, { "epoch": 0.02, "learning_rate": 0.0009877141933397997, "loss": 1.3628, "theoretical_loss": 3.4782488797332567, "tokens_seen": 1698299904 }, { "epoch": 0.02, "learning_rate": 0.0009873908826382152, "loss": 1.3178, "theoretical_loss": 3.478156463461527, "tokens_seen": 1698824192 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.026605570688843727, "objective/train/docs_used": 963095, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.959935188293457, "objective/train/original_loss": 2.959935188293457, "objective/train/theoretical_loss": 3.4780756291661343, "objective/train/tokens_used": 58142176, "objective/train/value_avg": -0.033203125, "objective/train/value_loss": 0.0014631144003942609, "objective/train/value_max": -0.0001596212387084961, "objective/train/value_min": -0.1910400390625, "objective/train/value_reward_corr": 0.06681558576086435, "objective/train/value_std": 0.0236053466796875, "objective/train/weight_avg": 1.0026679039001465, "objective/train/weighted_lm_loss": 2.9711110591888428, "objective/train/weights_max": 1.0188170671463013, "objective/train/weights_min": 0.9844861030578613, "theoretical_loss": 3.4780756291661343, "tokens_seen": 1699282944 }, { "epoch": 0.02, "learning_rate": 0.0009870675719366312, "loss": 1.3122, "theoretical_loss": 3.478064083689903, "tokens_seen": 1699348480 }, { "epoch": 0.02, "learning_rate": 0.000986744261235047, "loss": 1.3479, "theoretical_loss": 3.4779717403927153, "tokens_seen": 1699872768 }, { "epoch": 0.02, "learning_rate": 0.0009864209505334627, "loss": 1.3555, "theoretical_loss": 3.4778794335443193, "tokens_seen": 1700397056 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.0005425201379694045, "objective/train/docs_used": 963811, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.608184337615967, "objective/train/original_loss": 2.6081840991973877, "objective/train/theoretical_loss": 3.477787163119097, "objective/train/tokens_used": 59780576, "objective/train/value_avg": -0.0284423828125, "objective/train/value_loss": 0.012524149380624294, "objective/train/value_max": -0.0004189014434814453, "objective/train/value_min": -0.20166015625, "objective/train/value_reward_corr": 0.21947548105355605, "objective/train/value_std": 0.0215301513671875, "objective/train/weight_avg": 1.0001157522201538, "objective/train/weighted_lm_loss": 2.609344720840454, "objective/train/weights_max": 1.0203118324279785, "objective/train/weights_min": 0.9097063541412354, "theoretical_loss": 3.477787163119097, "tokens_seen": 1700921344 }, { "epoch": 0.02, "learning_rate": 0.0009860976398318785, "loss": 1.3784, "theoretical_loss": 3.477787163119097, "tokens_seen": 1700921344 }, { "epoch": 0.02, "learning_rate": 0.0009857743291302942, "loss": 1.3493, "theoretical_loss": 3.477694929091455, "tokens_seen": 1701445632 }, { "epoch": 0.02, "learning_rate": 0.00098545101842871, "loss": 1.3214, "theoretical_loss": 3.4776027314358267, "tokens_seen": 1701969920 }, { "epoch": 0.02, "learning_rate": 0.0009851277077271257, "loss": 1.3543, "theoretical_loss": 3.477510570126672, "tokens_seen": 1702494208 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.017675327137112617, "objective/train/docs_used": 965136, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0804636478424072, "objective/train/original_loss": 3.0804636478424072, "objective/train/theoretical_loss": 3.477499052517368, "objective/train/tokens_used": 61418976, "objective/train/value_avg": -0.02923583984375, "objective/train/value_loss": 0.005248533096164465, "objective/train/value_max": -0.0010528564453125, "objective/train/value_min": -0.1680908203125, "objective/train/value_reward_corr": 0.05992045680318433, "objective/train/value_std": 0.0195770263671875, "objective/train/weight_avg": 1.001793384552002, "objective/train/weighted_lm_loss": 3.0879435539245605, "objective/train/weights_max": 1.0168917179107666, "objective/train/weights_min": 0.9138116240501404, "theoretical_loss": 3.477499052517368, "tokens_seen": 1702559744 }, { "epoch": 0.03, "learning_rate": 0.0009848043970255415, "loss": 1.3699, "theoretical_loss": 3.477418445138476, "tokens_seen": 1703018496 }, { "epoch": 0.03, "learning_rate": 0.0009844810863239575, "loss": 1.333, "theoretical_loss": 3.477326356445749, "tokens_seen": 1703542784 }, { "epoch": 0.03, "learning_rate": 0.000984157775622373, "loss": 1.3266, "theoretical_loss": 3.477234304023027, "tokens_seen": 1704067072 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.01890840381383896, "objective/train/docs_used": 965825, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5166332721710205, "objective/train/original_loss": 2.5166330337524414, "objective/train/theoretical_loss": 3.4772112965815536, "objective/train/tokens_used": 63057376, "objective/train/value_avg": -0.026947021484375, "objective/train/value_loss": 0.003837679047137499, "objective/train/value_max": -0.0006289482116699219, "objective/train/value_min": -0.1676025390625, "objective/train/value_reward_corr": 0.04894281467788025, "objective/train/value_std": 0.0195465087890625, "objective/train/weight_avg": 1.0019097328186035, "objective/train/weighted_lm_loss": 2.5229299068450928, "objective/train/weights_max": 1.0168125629425049, "objective/train/weights_min": 0.9088606238365173, "theoretical_loss": 3.4772112965815536, "tokens_seen": 1704198144 }, { "epoch": 0.03, "learning_rate": 0.0009838344649207888, "loss": 1.3739, "theoretical_loss": 3.477142287844873, "tokens_seen": 1704591360 }, { "epoch": 0.03, "learning_rate": 0.0009835111542192047, "loss": 1.3615, "theoretical_loss": 3.4770503078858743, "tokens_seen": 1705115648 }, { "epoch": 0.03, "learning_rate": 0.0009831878435176205, "loss": 1.3433, "theoretical_loss": 3.4769583641206445, "tokens_seen": 1705639936 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.01988118328154087, "objective/train/docs_used": 966739, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4096128940582275, "objective/train/original_loss": 2.4096124172210693, "objective/train/theoretical_loss": 3.4769238945347127, "objective/train/tokens_used": 64695776, "objective/train/value_avg": -0.0241546630859375, "objective/train/value_loss": 0.0013872660929337144, "objective/train/value_max": -0.0007295608520507812, "objective/train/value_min": -0.151611328125, "objective/train/value_reward_corr": 0.08798634818857605, "objective/train/value_std": 0.0178680419921875, "objective/train/weight_avg": 1.0019949674606323, "objective/train/weighted_lm_loss": 2.4164247512817383, "objective/train/weights_max": 1.015211582183838, "objective/train/weights_min": 0.9323630332946777, "theoretical_loss": 3.4769238945347127, "tokens_seen": 1705836544 }, { "epoch": 0.03, "learning_rate": 0.0009828645328160363, "loss": 1.3627, "theoretical_loss": 3.4768664565238234, "tokens_seen": 1706164224 }, { "epoch": 0.03, "learning_rate": 0.000982541222114452, "loss": 1.3246, "theoretical_loss": 3.476774585070074, "tokens_seen": 1706688512 }, { "epoch": 0.03, "learning_rate": 0.0009822179114128678, "loss": 1.2822, "theoretical_loss": 3.4766827497340875, "tokens_seen": 1707212800 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.018486149609088898, "objective/train/docs_used": 967356, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.465986967086792, "objective/train/original_loss": 2.465986967086792, "objective/train/theoretical_loss": 3.476636845602353, "objective/train/tokens_used": 66334176, "objective/train/value_avg": -0.02655029296875, "objective/train/value_loss": 0.0022359928116202354, "objective/train/value_max": -0.0004954338073730469, "objective/train/value_min": -0.15185546875, "objective/train/value_reward_corr": 0.04463228075391548, "objective/train/value_std": 0.0194549560546875, "objective/train/weight_avg": 1.0018596649169922, "objective/train/weighted_lm_loss": 2.472989082336426, "objective/train/weights_max": 1.0152376890182495, "objective/train/weights_min": 0.9229694604873657, "theoretical_loss": 3.476636845602353, "tokens_seen": 1707474944 }, { "epoch": 0.03, "learning_rate": 0.0009818946007112835, "loss": 1.316, "theoretical_loss": 3.4765909504905794, "tokens_seen": 1707737088 }, { "epoch": 0.03, "learning_rate": 0.0009815712900096993, "loss": 1.3532, "theoretical_loss": 3.4764991873142908, "tokens_seen": 1708261376 }, { "epoch": 0.03, "learning_rate": 0.0009812479793081153, "loss": 1.3369, "theoretical_loss": 3.4764074601799875, "tokens_seen": 1708785664 }, { "epoch": 0.03, "objective/train/advantage_avg": -0.0037214274052530527, "objective/train/docs_used": 968332, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.312995433807373, "objective/train/original_loss": 2.312994956970215, "objective/train/theoretical_loss": 3.4763501490124176, "objective/train/tokens_used": 67972576, "objective/train/value_avg": -0.0271453857421875, "objective/train/value_loss": 0.009048771113157272, "objective/train/value_max": -0.0008358955383300781, "objective/train/value_min": -0.16845703125, "objective/train/value_reward_corr": 0.08193887699106578, "objective/train/value_std": 0.021087646484375, "objective/train/weight_avg": 0.9996722340583801, "objective/train/weighted_lm_loss": 2.3140177726745605, "objective/train/weights_max": 1.0169222354888916, "objective/train/weights_min": 0.908571720123291, "theoretical_loss": 3.4763501490124176, "tokens_seen": 1709113344 }, { "epoch": 0.03, "learning_rate": 0.0009809246686065308, "loss": 1.3393, "theoretical_loss": 3.476315769062462, "tokens_seen": 1709309952 }, { "epoch": 0.03, "learning_rate": 0.0009806013579049466, "loss": 1.3314, "theoretical_loss": 3.476224113936532, "tokens_seen": 1709834240 }, { "epoch": 0.03, "learning_rate": 0.0009802780472033625, "loss": 1.2981, "theoretical_loss": 3.476132494777039, "tokens_seen": 1710358528 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.013306492008268833, "objective/train/docs_used": 968886, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6279025077819824, "objective/train/original_loss": 2.6279022693634033, "objective/train/theoretical_loss": 3.4760638039952747, "objective/train/tokens_used": 69610976, "objective/train/value_avg": -0.02740478515625, "objective/train/value_loss": 0.004024366848170757, "objective/train/value_max": -0.0005908012390136719, "objective/train/value_min": -0.2108154296875, "objective/train/value_reward_corr": 0.03704522956658486, "objective/train/value_std": 0.0191497802734375, "objective/train/weight_avg": 1.0013505220413208, "objective/train/weighted_lm_loss": 2.6346938610076904, "objective/train/weights_max": 1.0176552534103394, "objective/train/weights_min": 0.9210671782493591, "theoretical_loss": 3.4760638039952747, "tokens_seen": 1710751744 }, { "epoch": 0.03, "learning_rate": 0.0009799547365017783, "loss": 1.3383, "theoretical_loss": 3.4760409115588518, "tokens_seen": 1710882816 }, { "epoch": 0.03, "learning_rate": 0.000979631425800194, "loss": 1.3647, "theoretical_loss": 3.4759493642568624, "tokens_seen": 1711407104 }, { "epoch": 0.03, "learning_rate": 0.0009793081150986098, "loss": 1.339, "theoretical_loss": 3.47585785284599, "tokens_seen": 1711931392 }, { "epoch": 0.03, "objective/train/advantage_avg": -0.005731384269893169, "objective/train/docs_used": 970327, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9252495765686035, "objective/train/original_loss": 2.9252493381500244, "objective/train/theoretical_loss": 3.4757778097837098, "objective/train/tokens_used": 71249376, "objective/train/value_avg": -0.03277587890625, "objective/train/value_loss": 0.023942546918988228, "objective/train/value_max": -0.0011157989501953125, "objective/train/value_min": -0.306396484375, "objective/train/value_reward_corr": 0.29451104917419524, "objective/train/value_std": 0.02264404296875, "objective/train/weight_avg": 0.9995435476303101, "objective/train/weighted_lm_loss": 2.9249048233032227, "objective/train/weights_max": 1.0181078910827637, "objective/train/weights_min": 0.907359778881073, "theoretical_loss": 3.4757778097837098, "tokens_seen": 1712390144 }, { "epoch": 0.03, "learning_rate": 0.0009789848043970256, "loss": 1.3297, "theoretical_loss": 3.4757663773011775, "tokens_seen": 1712455680 }, { "epoch": 0.03, "learning_rate": 0.0009786614936954413, "loss": 1.3603, "theoretical_loss": 3.475674937597394, "tokens_seen": 1712979968 }, { "epoch": 0.03, "learning_rate": 0.000978338182993857, "loss": 1.3131, "theoretical_loss": 3.4755835337096332, "tokens_seen": 1713504256 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.011326815001666546, "objective/train/docs_used": 971016, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.2406866550445557, "objective/train/original_loss": 3.2406868934631348, "objective/train/theoretical_loss": 3.4754921656129145, "objective/train/tokens_used": 72887776, "objective/train/value_avg": -0.03466796875, "objective/train/value_loss": 0.006263550370931625, "objective/train/value_max": -0.001674652099609375, "objective/train/value_min": -0.2763671875, "objective/train/value_reward_corr": 0.461638236475919, "objective/train/value_std": 0.026153564453125, "objective/train/weight_avg": 1.0011637210845947, "objective/train/weighted_lm_loss": 3.245138168334961, "objective/train/weights_max": 1.0188380479812622, "objective/train/weights_min": 0.9322776794433594, "theoretical_loss": 3.4754921656129145, "tokens_seen": 1714028544 }, { "epoch": 0.03, "learning_rate": 0.0009780148722922728, "loss": 1.3853, "theoretical_loss": 3.4754921656129145, "tokens_seen": 1714028544 }, { "epoch": 0.03, "learning_rate": 0.0009776915615906888, "loss": 1.3515, "theoretical_loss": 3.4754008332822806, "tokens_seen": 1714552832 }, { "epoch": 0.03, "learning_rate": 0.0009773682508891044, "loss": 1.3348, "theoretical_loss": 3.4753095366928015, "tokens_seen": 1715077120 }, { "epoch": 0.03, "learning_rate": 0.0009770449401875201, "loss": 1.3309, "theoretical_loss": 3.4752182758195707, "tokens_seen": 1715601408 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.014903021045029163, "objective/train/docs_used": 972281, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.32625150680542, "objective/train/original_loss": 2.3262507915496826, "objective/train/theoretical_loss": 3.475206870720476, "objective/train/tokens_used": 74526176, "objective/train/value_avg": -0.0233001708984375, "objective/train/value_loss": 0.0031084155198186636, "objective/train/value_max": -0.00027370452880859375, "objective/train/value_min": -0.173583984375, "objective/train/value_reward_corr": 0.07831338745798908, "objective/train/value_std": 0.0177001953125, "objective/train/weight_avg": 1.0015054941177368, "objective/train/weighted_lm_loss": 2.331597089767456, "objective/train/weights_max": 1.0174483060836792, "objective/train/weights_min": 0.9141354560852051, "theoretical_loss": 3.475206870720476, "tokens_seen": 1715666944 }, { "epoch": 0.03, "learning_rate": 0.000976721629485936, "loss": 1.3052, "theoretical_loss": 3.475127050637707, "tokens_seen": 1716125696 }, { "epoch": 0.03, "learning_rate": 0.0009763983187843518, "loss": 1.3298, "theoretical_loss": 3.475035861122355, "tokens_seen": 1716649984 }, { "epoch": 0.03, "learning_rate": 0.0009760750080827676, "loss": 1.3562, "theoretical_loss": 3.4749447072486825, "tokens_seen": 1717174272 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.006919887848198414, "objective/train/docs_used": 972817, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8087337017059326, "objective/train/original_loss": 2.808734178543091, "objective/train/theoretical_loss": 3.4749219243463694, "objective/train/tokens_used": 76164576, "objective/train/value_avg": -0.03240966796875, "objective/train/value_loss": 0.014523343183100224, "objective/train/value_max": -0.0007948875427246094, "objective/train/value_min": -0.2354736328125, "objective/train/value_reward_corr": 0.25943139773616125, "objective/train/value_std": 0.02313232421875, "objective/train/weight_avg": 1.0007628202438354, "objective/train/weighted_lm_loss": 2.8116960525512695, "objective/train/weights_max": 1.0180928707122803, "objective/train/weights_min": 0.9065375924110413, "theoretical_loss": 3.4749219243463694, "tokens_seen": 1717305344 }, { "epoch": 0.03, "learning_rate": 0.0009757516973811833, "loss": 1.3551, "theoretical_loss": 3.4748535889918837, "tokens_seen": 1717698560 }, { "epoch": 0.03, "learning_rate": 0.0009754283866795991, "loss": 1.294, "theoretical_loss": 3.4747625063271768, "tokens_seen": 1718222848 }, { "epoch": 0.03, "learning_rate": 0.0009751050759780149, "loss": 1.3373, "theoretical_loss": 3.474671459229805, "tokens_seen": 1718747136 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.008510448038578033, "objective/train/docs_used": 973424, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.733546733856201, "objective/train/original_loss": 2.733546018600464, "objective/train/theoretical_loss": 3.474637325732946, "objective/train/tokens_used": 77802976, "objective/train/value_avg": -0.0288848876953125, "objective/train/value_loss": 0.008285729214549065, "objective/train/value_max": -0.0013723373413085938, "objective/train/value_min": -0.267822265625, "objective/train/value_reward_corr": 0.24347285110038142, "objective/train/value_std": 0.023223876953125, "objective/train/weight_avg": 1.0008916854858398, "objective/train/weighted_lm_loss": 2.7370059490203857, "objective/train/weights_max": 1.018381953239441, "objective/train/weights_min": 0.9088394045829773, "theoretical_loss": 3.474637325732946, "tokens_seen": 1718943744 }, { "epoch": 0.04, "learning_rate": 0.0009747817652764307, "loss": 1.3299, "theoretical_loss": 3.4745804476750366, "tokens_seen": 1719271424 }, { "epoch": 0.04, "learning_rate": 0.0009744584545748465, "loss": 1.3119, "theoretical_loss": 3.4744894716381642, "tokens_seen": 1719795712 }, { "epoch": 0.04, "learning_rate": 0.0009741351438732622, "loss": 1.2732, "theoretical_loss": 3.4743985310945047, "tokens_seen": 1720320000 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.020797133445739746, "objective/train/docs_used": 974650, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8513402938842773, "objective/train/original_loss": 2.8513402938842773, "objective/train/theoretical_loss": 3.474353074124924, "objective/train/tokens_used": 79441376, "objective/train/value_avg": -0.028900146484375, "objective/train/value_loss": 0.0015088863437995315, "objective/train/value_max": -0.0011739730834960938, "objective/train/value_min": -0.1820068359375, "objective/train/value_reward_corr": 0.01949447297119301, "objective/train/value_std": 0.020538330078125, "objective/train/weight_avg": 1.0020872354507446, "objective/train/weighted_lm_loss": 2.8601443767547607, "objective/train/weights_max": 1.017999291419983, "objective/train/weights_min": 0.9089750647544861, "theoretical_loss": 3.474353074124924, "tokens_seen": 1720582144 }, { "epoch": 0.04, "learning_rate": 0.000973811833171678, "loss": 1.2955, "theoretical_loss": 3.474307626019401, "tokens_seen": 1720844288 }, { "epoch": 0.04, "learning_rate": 0.0009734885224700938, "loss": 1.2981, "theoretical_loss": 3.474216756388219, "tokens_seen": 1721368576 }, { "epoch": 0.04, "learning_rate": 0.0009731652117685095, "loss": 1.333, "theoretical_loss": 3.4741259221763507, "tokens_seen": 1721892864 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.01570254936814308, "objective/train/docs_used": 975130, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5209832191467285, "objective/train/original_loss": 2.5209832191467285, "objective/train/theoretical_loss": 3.474069168769379, "objective/train/tokens_used": 81079776, "objective/train/value_avg": -0.0268096923828125, "objective/train/value_loss": 0.0036664637736976147, "objective/train/value_max": -0.0008106231689453125, "objective/train/value_min": -0.40673828125, "objective/train/value_reward_corr": 0.21495503656367101, "objective/train/value_std": 0.01904296875, "objective/train/weight_avg": 1.0015883445739746, "objective/train/weighted_lm_loss": 2.5272328853607178, "objective/train/weights_max": 1.0142992734909058, "objective/train/weights_min": 0.9134161472320557, "theoretical_loss": 3.474069168769379, "tokens_seen": 1722220544 }, { "epoch": 0.04, "learning_rate": 0.0009728419010669254, "loss": 1.329, "theoretical_loss": 3.474035123359212, "tokens_seen": 1722417152 }, { "epoch": 0.04, "learning_rate": 0.000972518590365341, "loss": 1.3074, "theoretical_loss": 3.473944359912243, "tokens_seen": 1722941440 }, { "epoch": 0.04, "learning_rate": 0.0009721952796637569, "loss": 1.3431, "theoretical_loss": 3.4738536318109086, "tokens_seen": 1723465728 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.014603628776967525, "objective/train/docs_used": 976352, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2475247383117676, "objective/train/original_loss": 2.2475247383117676, "objective/train/theoretical_loss": 3.4737856089157355, "objective/train/tokens_used": 82718176, "objective/train/value_avg": -0.0263214111328125, "objective/train/value_loss": 0.003276633331552148, "objective/train/value_max": -0.0010480880737304688, "objective/train/value_min": -0.14306640625, "objective/train/value_reward_corr": 0.05448887319653048, "objective/train/value_std": 0.0183258056640625, "objective/train/weight_avg": 1.001476526260376, "objective/train/weighted_lm_loss": 2.252713680267334, "objective/train/weights_max": 1.013642430305481, "objective/train/weights_min": 0.9229938983917236, "theoretical_loss": 3.4737856089157355, "tokens_seen": 1723858944 }, { "epoch": 0.04, "learning_rate": 0.0009718719689621727, "loss": 1.3038, "theoretical_loss": 3.4737629390306988, "tokens_seen": 1723990016 }, { "epoch": 0.04, "learning_rate": 0.0009715486582605884, "loss": 1.3162, "theoretical_loss": 3.473672281547127, "tokens_seen": 1724514304 }, { "epoch": 0.04, "learning_rate": 0.0009712253475590043, "loss": 1.3448, "theoretical_loss": 3.4735816593357316, "tokens_seen": 1725038592 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.016449104994535446, "objective/train/docs_used": 977030, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2850451469421387, "objective/train/original_loss": 2.2850453853607178, "objective/train/theoretical_loss": 3.4735023938157563, "objective/train/tokens_used": 84356576, "objective/train/value_avg": -0.0244140625, "objective/train/value_loss": 0.0015009441412985325, "objective/train/value_max": -0.0004494190216064453, "objective/train/value_min": -0.203369140625, "objective/train/value_reward_corr": 0.06388559856583752, "objective/train/value_std": 0.0210723876953125, "objective/train/weight_avg": 1.001652479171753, "objective/train/weighted_lm_loss": 2.291083574295044, "objective/train/weights_max": 1.0201483964920044, "objective/train/weights_min": 0.9627787470817566, "theoretical_loss": 3.4735023938157563, "tokens_seen": 1725497344 }, { "epoch": 0.04, "learning_rate": 0.0009709020368574199, "loss": 1.3128, "theoretical_loss": 3.4734910723720755, "tokens_seen": 1725562880 }, { "epoch": 0.04, "learning_rate": 0.0009705787261558358, "loss": 1.3039, "theoretical_loss": 3.473400520631745, "tokens_seen": 1726087168 }, { "epoch": 0.04, "learning_rate": 0.0009702554154542516, "loss": 1.3136, "theoretical_loss": 3.473310004090352, "tokens_seen": 1726611456 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0006863871240057051, "objective/train/docs_used": 978106, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6131508350372314, "objective/train/original_loss": 2.6131508350372314, "objective/train/theoretical_loss": 3.4732195227235323, "objective/train/tokens_used": 85994976, "objective/train/value_avg": -0.0252685546875, "objective/train/value_loss": 0.007869298569858074, "objective/train/value_max": -0.0012693405151367188, "objective/train/value_min": -0.1705322265625, "objective/train/value_reward_corr": 0.3343813652076151, "objective/train/value_std": 0.018951416015625, "objective/train/weight_avg": 1.0001074075698853, "objective/train/weighted_lm_loss": 2.6153154373168945, "objective/train/weights_max": 1.0145231485366821, "objective/train/weights_min": 0.917260468006134, "theoretical_loss": 3.4732195227235323, "tokens_seen": 1727135744 }, { "epoch": 0.04, "learning_rate": 0.0009699321047526673, "loss": 1.3211, "theoretical_loss": 3.4732195227235323, "tokens_seen": 1727135744 }, { "epoch": 0.04, "learning_rate": 0.0009696087940510832, "loss": 1.3405, "theoretical_loss": 3.473129076506946, "tokens_seen": 1727660032 }, { "epoch": 0.04, "learning_rate": 0.0009692854833494988, "loss": 1.3216, "theoretical_loss": 3.4730386654162757, "tokens_seen": 1728184320 }, { "epoch": 0.04, "learning_rate": 0.0009689621726479146, "loss": 1.351, "theoretical_loss": 3.472948289427231, "tokens_seen": 1728708608 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.014950585551559925, "objective/train/docs_used": 978835, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8975865840911865, "objective/train/original_loss": 2.8975868225097656, "objective/train/theoretical_loss": 3.472936994895475, "objective/train/tokens_used": 87633376, "objective/train/value_avg": -0.0345458984375, "objective/train/value_loss": 0.0028118949849158525, "objective/train/value_max": -0.0018243789672851562, "objective/train/value_min": -0.2044677734375, "objective/train/value_reward_corr": 0.4282707496256444, "objective/train/value_std": 0.02734375, "objective/train/weight_avg": 1.0015089511871338, "objective/train/weighted_lm_loss": 2.9034531116485596, "objective/train/weights_max": 1.0160865783691406, "objective/train/weights_min": 0.9463914632797241, "theoretical_loss": 3.472936994895475, "tokens_seen": 1728774144 }, { "epoch": 0.04, "learning_rate": 0.0009686388619463305, "loss": 1.3249, "theoretical_loss": 3.4728579485155446, "tokens_seen": 1729232896 }, { "epoch": 0.04, "learning_rate": 0.0009683155512447462, "loss": 1.322, "theoretical_loss": 3.4727676426569722, "tokens_seen": 1729757184 }, { "epoch": 0.04, "learning_rate": 0.0009679922405431621, "loss": 1.3437, "theoretical_loss": 3.472677371827295, "tokens_seen": 1730281472 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.006407979875802994, "objective/train/docs_used": 979527, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.416654586791992, "objective/train/original_loss": 2.4166548252105713, "objective/train/theoretical_loss": 3.4726548095903045, "objective/train/tokens_used": 89271776, "objective/train/value_avg": -0.027099609375, "objective/train/value_loss": 0.003993797581642866, "objective/train/value_max": -0.0007886886596679688, "objective/train/value_min": -0.191162109375, "objective/train/value_reward_corr": 0.07296300309640055, "objective/train/value_std": 0.02099609375, "objective/train/weight_avg": 1.0006606578826904, "objective/train/weighted_lm_loss": 2.4212000370025635, "objective/train/weights_max": 1.0172492265701294, "objective/train/weights_min": 0.9332390427589417, "theoretical_loss": 3.4726548095903045, "tokens_seen": 1730412544 }, { "epoch": 0.04, "learning_rate": 0.0009676689298415777, "loss": 1.3439, "theoretical_loss": 3.4725871360023177, "tokens_seen": 1730805760 }, { "epoch": 0.04, "learning_rate": 0.0009673456191399935, "loss": 1.3334, "theoretical_loss": 3.472496935157869, "tokens_seen": 1731330048 }, { "epoch": 0.04, "learning_rate": 0.0009670223084384094, "loss": 1.3175, "theoretical_loss": 3.472406769269801, "tokens_seen": 1731854336 }, { "epoch": 0.04, "objective/train/advantage_avg": -0.005575783085078001, "objective/train/docs_used": 980288, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8826539516448975, "objective/train/original_loss": 2.8826537132263184, "objective/train/theoretical_loss": 3.4723729660690434, "objective/train/tokens_used": 90910176, "objective/train/value_avg": -0.0294952392578125, "objective/train/value_loss": 0.012900675646960735, "objective/train/value_max": -0.0006165504455566406, "objective/train/value_min": -0.27099609375, "objective/train/value_reward_corr": 0.42979710121399617, "objective/train/value_std": 0.02203369140625, "objective/train/weight_avg": 0.9995061159133911, "objective/train/weighted_lm_loss": 2.879244089126587, "objective/train/weights_max": 1.0274101495742798, "objective/train/weights_min": 0.9161306023597717, "theoretical_loss": 3.4723729660690434, "tokens_seen": 1732050944 }, { "epoch": 0.04, "learning_rate": 0.0009666989977368251, "loss": 1.3746, "theoretical_loss": 3.472316638313991, "tokens_seen": 1732378624 }, { "epoch": 0.04, "learning_rate": 0.0009663756870352409, "loss": 1.378, "theoretical_loss": 3.4722265422663408, "tokens_seen": 1732902912 }, { "epoch": 0.04, "learning_rate": 0.0009660523763336566, "loss": 1.3738, "theoretical_loss": 3.4721364811027735, "tokens_seen": 1733427200 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.02132727950811386, "objective/train/docs_used": 981887, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7392237186431885, "objective/train/original_loss": 2.7392232418060303, "objective/train/theoretical_loss": 3.472091463595004, "objective/train/tokens_used": 92548576, "objective/train/value_avg": -0.0267333984375, "objective/train/value_loss": 0.0011351255234330893, "objective/train/value_max": -0.0006093978881835938, "objective/train/value_min": -0.1737060546875, "objective/train/value_reward_corr": 0.0868598749789922, "objective/train/value_std": 0.0183563232421875, "objective/train/weight_avg": 1.0021384954452515, "objective/train/weighted_lm_loss": 2.747075080871582, "objective/train/weights_max": 1.016874074935913, "objective/train/weights_min": 0.9729582071304321, "theoretical_loss": 3.472091463595004, "tokens_seen": 1733689344 }, { "epoch": 0.04, "learning_rate": 0.0009657290656320724, "loss": 1.3259, "theoretical_loss": 3.472046454799238, "tokens_seen": 1733951488 }, { "epoch": 0.04, "learning_rate": 0.0009654057549304883, "loss": 1.3284, "theoretical_loss": 3.4719564633317064, "tokens_seen": 1734475776 }, { "epoch": 0.04, "learning_rate": 0.000965082444228904, "loss": 1.316, "theoretical_loss": 3.4718665066761756, "tokens_seen": 1735000064 }, { "epoch": 0.04, "objective/train/advantage_avg": -0.02310173586010933, "objective/train/docs_used": 982466, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.609210252761841, "objective/train/original_loss": 2.6092100143432617, "objective/train/theoretical_loss": 3.4718103014337816, "objective/train/tokens_used": 94186976, "objective/train/value_avg": -0.0273895263671875, "objective/train/value_loss": 0.025188777595758438, "objective/train/value_max": -0.0016870498657226562, "objective/train/value_min": -0.234375, "objective/train/value_reward_corr": 0.22316131074334103, "objective/train/value_std": 0.022308349609375, "objective/train/weight_avg": 0.99781334400177, "objective/train/weighted_lm_loss": 2.610234022140503, "objective/train/weights_max": 1.0207655429840088, "objective/train/weights_min": 0.908212423324585, "theoretical_loss": 3.4718103014337816, "tokens_seen": 1735327744 }, { "epoch": 0.05, "learning_rate": 0.0009647591335273198, "loss": 1.312, "theoretical_loss": 3.471776584808665, "tokens_seen": 1735524352 }, { "epoch": 0.05, "learning_rate": 0.0009644358228257355, "loss": 1.3511, "theoretical_loss": 3.471686697705218, "tokens_seen": 1736048640 }, { "epoch": 0.05, "learning_rate": 0.0009641125121241513, "loss": 1.3571, "theoretical_loss": 3.471596845341903, "tokens_seen": 1736572928 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.0122290113940835, "objective/train/docs_used": 983872, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4851791858673096, "objective/train/original_loss": 2.4851789474487305, "objective/train/theoretical_loss": 3.4715294788532445, "objective/train/tokens_used": 95825376, "objective/train/value_avg": -0.03466796875, "objective/train/value_loss": 0.006056983023881912, "objective/train/value_max": -0.0018310546875, "objective/train/value_min": -0.319091796875, "objective/train/value_reward_corr": 0.398271622174376, "objective/train/value_std": 0.036651611328125, "objective/train/weight_avg": 1.0012527704238892, "objective/train/weighted_lm_loss": 2.490361452102661, "objective/train/weights_max": 1.026105284690857, "objective/train/weights_min": 0.9247947931289673, "theoretical_loss": 3.4715294788532445, "tokens_seen": 1736966144 }, { "epoch": 0.05, "learning_rate": 0.000963789201422567, "loss": 1.3389, "theoretical_loss": 3.4715070276948103, "tokens_seen": 1737097216 }, { "epoch": 0.05, "learning_rate": 0.0009634658907209829, "loss": 1.3411, "theoretical_loss": 3.471417244740055, "tokens_seen": 1737621504 }, { "epoch": 0.05, "learning_rate": 0.0009631425800193987, "loss": 1.3767, "theoretical_loss": 3.4713274964537755, "tokens_seen": 1738145792 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.0008282791823148727, "objective/train/docs_used": 984348, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4274837970733643, "objective/train/original_loss": 2.427483558654785, "objective/train/theoretical_loss": 3.471248995123524, "objective/train/tokens_used": 97463776, "objective/train/value_avg": -0.030181884765625, "objective/train/value_loss": 0.008271356113255024, "objective/train/value_max": -0.0005359649658203125, "objective/train/value_min": -0.81494140625, "objective/train/value_reward_corr": 0.6681748882029085, "objective/train/value_std": 0.037017822265625, "objective/train/weight_avg": 1.0001235008239746, "objective/train/weighted_lm_loss": 2.4277076721191406, "objective/train/weights_max": 1.0270813703536987, "objective/train/weights_min": 0.9073331952095032, "theoretical_loss": 3.471248995123524, "tokens_seen": 1738604544 }, { "epoch": 0.05, "learning_rate": 0.0009628192693178145, "loss": 1.3146, "theoretical_loss": 3.4712377828121337, "tokens_seen": 1738670080 }, { "epoch": 0.05, "learning_rate": 0.0009624959586162302, "loss": 1.2929, "theoretical_loss": 3.471148103791315, "tokens_seen": 1739194368 }, { "epoch": 0.05, "learning_rate": 0.0009621726479146459, "loss": 1.3458, "theoretical_loss": 3.471058459367529, "tokens_seen": 1739718656 }, { "epoch": 0.05, "objective/train/advantage_avg": -0.007756249979138374, "objective/train/docs_used": 985100, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6939330101013184, "objective/train/original_loss": 2.6939332485198975, "objective/train/theoretical_loss": 3.470968849517008, "objective/train/tokens_used": 99102176, "objective/train/value_avg": -0.026580810546875, "objective/train/value_loss": 0.011918692849576473, "objective/train/value_max": -0.00127410888671875, "objective/train/value_min": -0.1568603515625, "objective/train/value_reward_corr": -0.06532926168555772, "objective/train/value_std": 0.0182037353515625, "objective/train/weight_avg": 0.9992831349372864, "objective/train/weighted_lm_loss": 2.697981119155884, "objective/train/weights_max": 1.0148284435272217, "objective/train/weights_min": 0.9467129111289978, "theoretical_loss": 3.470968849517008, "tokens_seen": 1740242944 }, { "epoch": 0.05, "learning_rate": 0.0009618493372130618, "loss": 1.3133, "theoretical_loss": 3.470968849517008, "tokens_seen": 1740242944 }, { "epoch": 0.05, "learning_rate": 0.0009615260265114776, "loss": 1.3219, "theoretical_loss": 3.470879274216008, "tokens_seen": 1740767232 }, { "epoch": 0.05, "learning_rate": 0.0009612027158098934, "loss": 1.3012, "theoretical_loss": 3.4707897334408093, "tokens_seen": 1741291520 }, { "epoch": 0.05, "learning_rate": 0.0009608794051083091, "loss": 1.3444, "theoretical_loss": 3.4707002271677143, "tokens_seen": 1741815808 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.006851123180240393, "objective/train/docs_used": 986266, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8433356285095215, "objective/train/original_loss": 2.8433353900909424, "objective/train/theoretical_loss": 3.4706890413083276, "objective/train/tokens_used": 100740576, "objective/train/value_avg": -0.031097412109375, "objective/train/value_loss": 0.0044151716865599155, "objective/train/value_max": -0.00217437744140625, "objective/train/value_min": -0.35693359375, "objective/train/value_reward_corr": 0.19278831924771955, "objective/train/value_std": 0.02471923828125, "objective/train/weight_avg": 1.0007070302963257, "objective/train/weighted_lm_loss": 2.847325086593628, "objective/train/weights_max": 1.0359439849853516, "objective/train/weights_min": 0.947567343711853, "theoretical_loss": 3.4706890413083276, "tokens_seen": 1741881344 }, { "epoch": 0.05, "learning_rate": 0.0009605560944067248, "loss": 1.3659, "theoretical_loss": 3.4706107553730496, "tokens_seen": 1742340096 }, { "epoch": 0.05, "learning_rate": 0.0009602327837051407, "loss": 1.3185, "theoretical_loss": 3.470521318033165, "tokens_seen": 1742864384 }, { "epoch": 0.05, "learning_rate": 0.0009599094730035565, "loss": 1.2676, "theoretical_loss": 3.4704319151244327, "tokens_seen": 1743388672 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.019733382388949394, "objective/train/docs_used": 986906, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6569700241088867, "objective/train/original_loss": 2.6569700241088867, "objective/train/theoretical_loss": 3.4704095697743513, "objective/train/tokens_used": 102378976, "objective/train/value_avg": -0.025299072265625, "objective/train/value_loss": 0.0013134790351614356, "objective/train/value_max": -0.00131988525390625, "objective/train/value_min": -0.1795654296875, "objective/train/value_reward_corr": 0.13268251663275213, "objective/train/value_std": 0.0179443359375, "objective/train/weight_avg": 1.0019798278808594, "objective/train/weighted_lm_loss": 2.6639018058776855, "objective/train/weights_max": 1.0173629522323608, "objective/train/weights_min": 0.9430978298187256, "theoretical_loss": 3.4704095697743513, "tokens_seen": 1743519744 }, { "epoch": 0.05, "learning_rate": 0.0009595861623019722, "loss": 1.3246, "theoretical_loss": 3.4703425466232503, "tokens_seen": 1743912960 }, { "epoch": 0.05, "learning_rate": 0.000959262851600388, "loss": 1.3703, "theoretical_loss": 3.4702532125060372, "tokens_seen": 1744437248 }, { "epoch": 0.05, "learning_rate": 0.0009589395408988037, "loss": 1.3606, "theoretical_loss": 3.470163912749236, "tokens_seen": 1744961536 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.01587463542819023, "objective/train/docs_used": 988319, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.586710214614868, "objective/train/original_loss": 2.586710214614868, "objective/train/theoretical_loss": 3.470130434194175, "objective/train/tokens_used": 104017376, "objective/train/value_avg": -0.025848388671875, "objective/train/value_loss": 0.0050507476553320885, "objective/train/value_max": -0.0008296966552734375, "objective/train/value_min": -0.53759765625, "objective/train/value_reward_corr": 0.35691556707510674, "objective/train/value_std": 0.0237884521484375, "objective/train/weight_avg": 1.0016123056411743, "objective/train/weighted_lm_loss": 2.5915367603302, "objective/train/weights_max": 1.027877688407898, "objective/train/weights_min": 0.9081292748451233, "theoretical_loss": 3.470130434194175, "tokens_seen": 1745158144 }, { "epoch": 0.05, "learning_rate": 0.0009586162301972196, "loss": 1.3033, "theoretical_loss": 3.4700746473293127, "tokens_seen": 1745485824 }, { "epoch": 0.05, "learning_rate": 0.0009582929194956354, "loss": 1.3144, "theoretical_loss": 3.469985416222757, "tokens_seen": 1746010112 }, { "epoch": 0.05, "learning_rate": 0.0009579696087940511, "loss": 1.2841, "theoretical_loss": 3.469896219406081, "tokens_seen": 1746534400 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.01022767461836338, "objective/train/docs_used": 989102, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2875752449035645, "objective/train/original_loss": 2.2875747680664062, "objective/train/theoretical_loss": 3.4698516338491143, "objective/train/tokens_used": 105655776, "objective/train/value_avg": -0.0274505615234375, "objective/train/value_loss": 0.002917420817539096, "objective/train/value_max": -0.0015010833740234375, "objective/train/value_min": -0.2193603515625, "objective/train/value_reward_corr": 0.47525705802944795, "objective/train/value_std": 0.025238037109375, "objective/train/weight_avg": 1.0010372400283813, "objective/train/weighted_lm_loss": 2.290987968444824, "objective/train/weights_max": 1.0181199312210083, "objective/train/weights_min": 0.9505501389503479, "theoretical_loss": 3.4698516338491143, "tokens_seen": 1746796544 }, { "epoch": 0.05, "learning_rate": 0.0009576462980924669, "loss": 1.3244, "theoretical_loss": 3.4698070568558204, "tokens_seen": 1747058688 }, { "epoch": 0.05, "learning_rate": 0.0009573229873908826, "loss": 1.3331, "theoretical_loss": 3.4697179285485333, "tokens_seen": 1747582976 }, { "epoch": 0.05, "learning_rate": 0.0009569996766892984, "loss": 1.3585, "theoretical_loss": 3.469628834460803, "tokens_seen": 1748107264 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.01215823832899332, "objective/train/docs_used": 990185, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 1.9879776239395142, "objective/train/original_loss": 1.9879776239395142, "objective/train/theoretical_loss": 3.469573168022693, "objective/train/tokens_used": 107294176, "objective/train/value_avg": -0.0292205810546875, "objective/train/value_loss": 0.0035292685497552156, "objective/train/value_max": -0.0005273818969726562, "objective/train/value_min": -0.353271484375, "objective/train/value_reward_corr": 0.3202447000542465, "objective/train/value_std": 0.0297088623046875, "objective/train/weight_avg": 1.0012332201004028, "objective/train/weighted_lm_loss": 1.991782546043396, "objective/train/weights_max": 1.025057315826416, "objective/train/weights_min": 0.9216517210006714, "theoretical_loss": 3.469573168022693, "tokens_seen": 1748434944 }, { "epoch": 0.05, "learning_rate": 0.0009566763659877143, "loss": 1.3224, "theoretical_loss": 3.469539774569233, "tokens_seen": 1748631552 }, { "epoch": 0.05, "learning_rate": 0.00095635305528613, "loss": 1.3308, "theoretical_loss": 3.4694507488504502, "tokens_seen": 1749155840 }, { "epoch": 0.05, "learning_rate": 0.0009560297445845458, "loss": 1.3702, "theoretical_loss": 3.4693617572811073, "tokens_seen": 1749680128 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.014837173745036125, "objective/train/docs_used": 990903, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.470926523208618, "objective/train/original_loss": 2.470926284790039, "objective/train/theoretical_loss": 3.4692950360006365, "objective/train/tokens_used": 108932576, "objective/train/value_avg": -0.034332275390625, "objective/train/value_loss": 0.005477494560182095, "objective/train/value_max": -0.0008592605590820312, "objective/train/value_min": -0.572265625, "objective/train/value_reward_corr": 0.3908080832456804, "objective/train/value_std": 0.0308380126953125, "objective/train/weight_avg": 1.001510739326477, "objective/train/weighted_lm_loss": 2.475952386856079, "objective/train/weights_max": 1.0263042449951172, "objective/train/weights_min": 0.919764518737793, "theoretical_loss": 3.4692950360006365, "tokens_seen": 1750073344 }, { "epoch": 0.05, "learning_rate": 0.0009557064338829615, "loss": 1.3337, "theoretical_loss": 3.469272799837877, "tokens_seen": 1750204416 }, { "epoch": 0.05, "learning_rate": 0.0009553831231813773, "loss": 1.3125, "theoretical_loss": 3.469183876497456, "tokens_seen": 1750728704 }, { "epoch": 0.05, "learning_rate": 0.0009550598124797932, "loss": 1.3383, "theoretical_loss": 3.4690949872365633, "tokens_seen": 1751252992 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.012107845395803452, "objective/train/docs_used": 991641, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2792959213256836, "objective/train/original_loss": 2.2792956829071045, "objective/train/theoretical_loss": 3.4690172370708616, "objective/train/tokens_used": 110570976, "objective/train/value_avg": -0.0232391357421875, "objective/train/value_loss": 0.0012372436467558146, "objective/train/value_max": -0.0007524490356445312, "objective/train/value_min": -0.1776123046875, "objective/train/value_reward_corr": 0.037915383377116335, "objective/train/value_std": 0.0171661376953125, "objective/train/weight_avg": 1.001217007637024, "objective/train/weighted_lm_loss": 2.2854347229003906, "objective/train/weights_max": 1.0155538320541382, "objective/train/weights_min": 0.9400930404663086, "theoretical_loss": 3.4690172370708616, "tokens_seen": 1751711744 }, { "epoch": 0.06, "learning_rate": 0.0009547365017782089, "loss": 1.2858, "theoretical_loss": 3.469006132031942, "tokens_seen": 1751777280 }, { "epoch": 0.06, "learning_rate": 0.0009544131910766246, "loss": 1.2948, "theoretical_loss": 3.4689173108603564, "tokens_seen": 1752301568 }, { "epoch": 0.06, "learning_rate": 0.0009540898803750404, "loss": 1.3781, "theoretical_loss": 3.4688285236985954, "tokens_seen": 1752825856 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.016374630853533745, "objective/train/docs_used": 992944, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8299684524536133, "objective/train/original_loss": 2.829967737197876, "objective/train/theoretical_loss": 3.4687397705234693, "objective/train/tokens_used": 112209376, "objective/train/value_avg": -0.0274200439453125, "objective/train/value_loss": 0.0030080650467425585, "objective/train/value_max": -0.0012302398681640625, "objective/train/value_min": -0.392578125, "objective/train/value_reward_corr": 0.25957978614649824, "objective/train/value_std": 0.0185546875, "objective/train/weight_avg": 1.0016523599624634, "objective/train/weighted_lm_loss": 2.836483955383301, "objective/train/weights_max": 1.0222454071044922, "objective/train/weights_min": 0.9076098799705505, "theoretical_loss": 3.4687397705234693, "tokens_seen": 1753350144 }, { "epoch": 0.06, "learning_rate": 0.0009537665696734562, "loss": 1.3301, "theoretical_loss": 3.4687397705234693, "tokens_seen": 1753350144 }, { "epoch": 0.06, "learning_rate": 0.000953443258971872, "loss": 1.3424, "theoretical_loss": 3.468651051311811, "tokens_seen": 1753874432 }, { "epoch": 0.06, "learning_rate": 0.0009531199482702878, "loss": 1.3973, "theoretical_loss": 3.468562366040478, "tokens_seen": 1754398720 }, { "epoch": 0.06, "learning_rate": 0.0009527966375687035, "loss": 1.3266, "theoretical_loss": 3.468473714686348, "tokens_seen": 1754923008 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.015252980403602123, "objective/train/docs_used": 993452, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2693066596984863, "objective/train/original_loss": 2.2693064212799072, "objective/train/theoretical_loss": 3.4684626356507344, "objective/train/tokens_used": 113847776, "objective/train/value_avg": -0.02569580078125, "objective/train/value_loss": 0.00543864443898201, "objective/train/value_max": -0.0007643699645996094, "objective/train/value_min": -0.5888671875, "objective/train/value_reward_corr": 0.2999773533092841, "objective/train/value_std": 0.0225067138671875, "objective/train/weight_avg": 1.0015519857406616, "objective/train/weighted_lm_loss": 2.274026393890381, "objective/train/weights_max": 1.020256757736206, "objective/train/weights_min": 0.9070285558700562, "theoretical_loss": 3.4684626356507344, "tokens_seen": 1754988544 }, { "epoch": 0.06, "learning_rate": 0.0009524733268671193, "loss": 1.3169, "theoretical_loss": 3.4683850972263226, "tokens_seen": 1755447296 }, { "epoch": 0.06, "learning_rate": 0.0009521500161655351, "loss": 1.3063, "theoretical_loss": 3.468296513637326, "tokens_seen": 1755971584 }, { "epoch": 0.06, "learning_rate": 0.0009518267054639509, "loss": 1.3136, "theoretical_loss": 3.4682079638963055, "tokens_seen": 1756495872 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.013180762529373169, "objective/train/docs_used": 994709, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2248470783233643, "objective/train/original_loss": 2.2248470783233643, "objective/train/theoretical_loss": 3.468185831747097, "objective/train/tokens_used": 115486176, "objective/train/value_avg": -0.0307464599609375, "objective/train/value_loss": 0.006912854965776205, "objective/train/value_max": -0.0007700920104980469, "objective/train/value_min": -0.748046875, "objective/train/value_reward_corr": 0.4325782985228719, "objective/train/value_std": 0.03594970703125, "objective/train/weight_avg": 1.001352071762085, "objective/train/weighted_lm_loss": 2.2296142578125, "objective/train/weights_max": 1.0344078540802002, "objective/train/weights_min": 0.911996066570282, "theoretical_loss": 3.468185831747097, "tokens_seen": 1756626944 }, { "epoch": 0.06, "learning_rate": 0.0009515033947623667, "loss": 1.3142, "theoretical_loss": 3.46811944798023, "tokens_seen": 1757020160 }, { "epoch": 0.06, "learning_rate": 0.0009511800840607824, "loss": 1.2868, "theoretical_loss": 3.468030965866091, "tokens_seen": 1757544448 }, { "epoch": 0.06, "learning_rate": 0.0009508567733591982, "loss": 1.3019, "theoretical_loss": 3.4679425175309033, "tokens_seen": 1758068736 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0018833609065040946, "objective/train/docs_used": 995465, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4297537803649902, "objective/train/original_loss": 2.429753541946411, "objective/train/theoretical_loss": 3.4679093581091562, "objective/train/tokens_used": 117124576, "objective/train/value_avg": -0.0300445556640625, "objective/train/value_loss": 0.005696803797036409, "objective/train/value_max": -0.0010280609130859375, "objective/train/value_min": -0.475830078125, "objective/train/value_reward_corr": 0.4188813229555936, "objective/train/value_std": 0.0311431884765625, "objective/train/weight_avg": 1.0002163648605347, "objective/train/weighted_lm_loss": 2.431326389312744, "objective/train/weights_max": 1.0242259502410889, "objective/train/weights_min": 0.9152461886405945, "theoretical_loss": 3.4679093581091562, "tokens_seen": 1758265344 }, { "epoch": 0.06, "learning_rate": 0.000950533462657614, "loss": 1.3194, "theoretical_loss": 3.4678541029517036, "tokens_seen": 1758593024 }, { "epoch": 0.06, "learning_rate": 0.0009502101519560297, "loss": 1.3169, "theoretical_loss": 3.4677657221055513, "tokens_seen": 1759117312 }, { "epoch": 0.06, "learning_rate": 0.0009498868412544456, "loss": 1.3362, "theoretical_loss": 3.4676773749695275, "tokens_seen": 1759641600 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.007520793471485376, "objective/train/docs_used": 996601, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6049442291259766, "objective/train/original_loss": 2.6049439907073975, "objective/train/theoretical_loss": 3.4676332140356583, "objective/train/tokens_used": 118762976, "objective/train/value_avg": -0.029541015625, "objective/train/value_loss": 0.0047663357108831406, "objective/train/value_max": -0.0011034011840820312, "objective/train/value_min": -0.308349609375, "objective/train/value_reward_corr": 0.5193194600121129, "objective/train/value_std": 0.031829833984375, "objective/train/weight_avg": 1.000775694847107, "objective/train/weighted_lm_loss": 2.6075937747955322, "objective/train/weights_max": 1.0235344171524048, "objective/train/weights_min": 0.938328742980957, "theoretical_loss": 3.4676332140356583, "tokens_seen": 1759903744 }, { "epoch": 0.06, "learning_rate": 0.0009495635305528612, "loss": 1.3269, "theoretical_loss": 3.4675890615207368, "tokens_seen": 1760165888 }, { "epoch": 0.06, "learning_rate": 0.0009492402198512771, "loss": 1.3569, "theoretical_loss": 3.4675007817363057, "tokens_seen": 1760690176 }, { "epoch": 0.06, "learning_rate": 0.0009489169091496929, "loss": 1.3502, "theoretical_loss": 3.4674125355933825, "tokens_seen": 1761214464 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.017532380297780037, "objective/train/docs_used": 997243, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6635971069335938, "objective/train/original_loss": 2.6635971069335938, "objective/train/theoretical_loss": 3.467357398827489, "objective/train/tokens_used": 120401376, "objective/train/value_avg": -0.025634765625, "objective/train/value_loss": 0.0019115842878818512, "objective/train/value_max": -0.000690460205078125, "objective/train/value_min": -0.2166748046875, "objective/train/value_reward_corr": 0.20897879389730797, "objective/train/value_std": 0.0188751220703125, "objective/train/weight_avg": 1.0017626285552979, "objective/train/weighted_lm_loss": 2.6702544689178467, "objective/train/weights_max": 1.0192686319351196, "objective/train/weights_min": 0.9302273988723755, "theoretical_loss": 3.467357398827489, "tokens_seen": 1761542144 }, { "epoch": 0.06, "learning_rate": 0.0009485935984481086, "loss": 1.3322, "theoretical_loss": 3.467324323069139, "tokens_seen": 1761738752 }, { "epoch": 0.06, "learning_rate": 0.0009482702877465245, "loss": 1.3185, "theoretical_loss": 3.4672361441407675, "tokens_seen": 1762263040 }, { "epoch": 0.06, "learning_rate": 0.0009479469770449403, "loss": 1.3561, "theoretical_loss": 3.4671479987854847, "tokens_seen": 1762787328 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.015057829208672047, "objective/train/docs_used": 998400, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.753380298614502, "objective/train/original_loss": 2.7533798217773438, "objective/train/theoretical_loss": 3.467081911787668, "objective/train/tokens_used": 122039776, "objective/train/value_avg": -0.0203399658203125, "objective/train/value_loss": 0.0008299863548018038, "objective/train/value_max": -0.0009927749633789062, "objective/train/value_min": -0.128662109375, "objective/train/value_reward_corr": 0.025133317086062503, "objective/train/value_std": 0.01204681396484375, "objective/train/weight_avg": 1.0015099048614502, "objective/train/weighted_lm_loss": 2.7592740058898926, "objective/train/weights_max": 1.0128883123397827, "objective/train/weights_min": 0.9715917706489563, "theoretical_loss": 3.467081911787668, "tokens_seen": 1763180544 }, { "epoch": 0.06, "learning_rate": 0.0009476236663433559, "loss": 1.3484, "theoretical_loss": 3.467059886980528, "tokens_seen": 1763311616 }, { "epoch": 0.06, "learning_rate": 0.0009473003556417718, "loss": 1.3323, "theoretical_loss": 3.4669718087031574, "tokens_seen": 1763835904 }, { "epoch": 0.06, "learning_rate": 0.0009469770449401875, "loss": 1.3655, "theoretical_loss": 3.466883763930655, "tokens_seen": 1764360192 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.016653353348374367, "objective/train/docs_used": 998902, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.93595814704895, "objective/train/original_loss": 2.935957908630371, "objective/train/theoretical_loss": 3.4668067522213355, "objective/train/tokens_used": 123678176, "objective/train/value_avg": -0.0299530029296875, "objective/train/value_loss": 0.0028108160477131605, "objective/train/value_max": -0.0010442733764648438, "objective/train/value_min": -0.51025390625, "objective/train/value_reward_corr": 0.359808899889377, "objective/train/value_std": 0.024627685546875, "objective/train/weight_avg": 1.0016794204711914, "objective/train/weighted_lm_loss": 2.94246244430542, "objective/train/weights_max": 1.0307047367095947, "objective/train/weights_min": 0.9174464344978333, "theoretical_loss": 3.4668067522213355, "tokens_seen": 1764818944 }, { "epoch": 0.06, "learning_rate": 0.0009466537342386034, "loss": 1.356, "theoretical_loss": 3.4667957526403255, "tokens_seen": 1764884480 }, { "epoch": 0.06, "learning_rate": 0.0009463304235370192, "loss": 1.3981, "theoretical_loss": 3.4667077748094948, "tokens_seen": 1765408768 }, { "epoch": 0.06, "learning_rate": 0.0009460071128354348, "loss": 1.3418, "theoretical_loss": 3.466619830415512, "tokens_seen": 1765933056 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.007814303040504456, "objective/train/docs_used": 999561, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4640517234802246, "objective/train/original_loss": 2.4640512466430664, "objective/train/theoretical_loss": 3.4665319194357473, "objective/train/tokens_used": 125316576, "objective/train/value_avg": -0.029022216796875, "objective/train/value_loss": 0.005800583865493536, "objective/train/value_max": -0.0008864402770996094, "objective/train/value_min": -0.48583984375, "objective/train/value_reward_corr": 0.3496722797738707, "objective/train/value_std": 0.0272674560546875, "objective/train/weight_avg": 1.000809907913208, "objective/train/weighted_lm_loss": 2.468526601791382, "objective/train/weights_max": 1.0472798347473145, "objective/train/weights_min": 0.9072762131690979, "theoretical_loss": 3.4665319194357473, "tokens_seen": 1766457344 }, { "epoch": 0.06, "learning_rate": 0.0009456838021338507, "loss": 1.3559, "theoretical_loss": 3.4665319194357473, "tokens_seen": 1766457344 }, { "epoch": 0.06, "learning_rate": 0.0009453604914322664, "loss": 1.3633, "theoretical_loss": 3.4664440418475935, "tokens_seen": 1766981632 }, { "epoch": 0.06, "learning_rate": 0.0009450371807306822, "loss": 1.3611, "theoretical_loss": 3.466356197628465, "tokens_seen": 1767505920 }, { "epoch": 0.06, "learning_rate": 0.000944713870029098, "loss": 1.2738, "theoretical_loss": 3.4662683867557984, "tokens_seen": 1768030208 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0108227813616395, "objective/train/docs_used": 1000589, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.83927059173584, "objective/train/original_loss": 2.8392703533172607, "objective/train/theoretical_loss": 3.4662574127402657, "objective/train/tokens_used": 126954976, "objective/train/value_avg": -0.0239410400390625, "objective/train/value_loss": 0.0028038048185408115, "objective/train/value_max": -0.000904083251953125, "objective/train/value_min": -0.204345703125, "objective/train/value_reward_corr": 0.07982110019527905, "objective/train/value_std": 0.0159454345703125, "objective/train/weight_avg": 1.0010961294174194, "objective/train/weighted_lm_loss": 2.844484567642212, "objective/train/weights_max": 1.0205787420272827, "objective/train/weights_min": 0.9196072220802307, "theoretical_loss": 3.4662574127402657, "tokens_seen": 1768095744 }, { "epoch": 0.07, "learning_rate": 0.0009443905593275137, "loss": 1.3244, "theoretical_loss": 3.466180609207053, "tokens_seen": 1768554496 }, { "epoch": 0.07, "learning_rate": 0.0009440672486259296, "loss": 1.3616, "theoretical_loss": 3.466092864959708, "tokens_seen": 1769078784 }, { "epoch": 0.07, "learning_rate": 0.0009437439379243453, "loss": 1.3597, "theoretical_loss": 3.4660051539912664, "tokens_seen": 1769603072 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.013167756609618664, "objective/train/docs_used": 1001288, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.946078062057495, "objective/train/original_loss": 2.946077823638916, "objective/train/theoretical_loss": 3.4659832314463506, "objective/train/tokens_used": 128593376, "objective/train/value_avg": -0.0240936279296875, "objective/train/value_loss": 0.004623904824256897, "objective/train/value_max": -0.0011835098266601562, "objective/train/value_min": -0.25341796875, "objective/train/value_reward_corr": 0.14244312383724372, "objective/train/value_std": 0.0199737548828125, "objective/train/weight_avg": 1.0013395547866821, "objective/train/weighted_lm_loss": 2.9514763355255127, "objective/train/weights_max": 1.0210210084915161, "objective/train/weights_min": 0.9093143343925476, "theoretical_loss": 3.4659832314463506, "tokens_seen": 1769734144 }, { "epoch": 0.07, "learning_rate": 0.0009434206272227611, "loss": 1.3506, "theoretical_loss": 3.4659174762792526, "tokens_seen": 1770127360 }, { "epoch": 0.07, "learning_rate": 0.0009430973165211769, "loss": 1.3678, "theoretical_loss": 3.4658298318012117, "tokens_seen": 1770651648 }, { "epoch": 0.07, "learning_rate": 0.0009427740058195926, "loss": 1.3573, "theoretical_loss": 3.465742220534713, "tokens_seen": 1771175936 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.017762089148163795, "objective/train/docs_used": 1002447, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7887346744537354, "objective/train/original_loss": 2.7887349128723145, "objective/train/theoretical_loss": 3.465709374867551, "objective/train/tokens_used": 130231776, "objective/train/value_avg": -0.025970458984375, "objective/train/value_loss": 0.00224806135520339, "objective/train/value_max": -0.0015974044799804688, "objective/train/value_min": -0.324462890625, "objective/train/value_reward_corr": 0.10196264911090511, "objective/train/value_std": 0.017578125, "objective/train/weight_avg": 1.0017874240875244, "objective/train/weighted_lm_loss": 2.794950246810913, "objective/train/weights_max": 1.0159268379211426, "objective/train/weights_min": 0.9274219870567322, "theoretical_loss": 3.465709374867551, "tokens_seen": 1771372544 }, { "epoch": 0.07, "learning_rate": 0.0009424506951180085, "loss": 1.304, "theoretical_loss": 3.465654642457344, "tokens_seen": 1771700224 }, { "epoch": 0.07, "learning_rate": 0.0009421273844164242, "loss": 1.3558, "theoretical_loss": 3.4655670975467183, "tokens_seen": 1772224512 }, { "epoch": 0.07, "learning_rate": 0.00094180407371484, "loss": 1.3464, "theoretical_loss": 3.465479585780467, "tokens_seen": 1772748800 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.016024593263864517, "objective/train/docs_used": 1002969, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0770609378814697, "objective/train/original_loss": 3.0770606994628906, "objective/train/theoretical_loss": 3.465435842319499, "objective/train/tokens_used": 131870176, "objective/train/value_avg": -0.0300445556640625, "objective/train/value_loss": 0.00524939363822341, "objective/train/value_max": -0.0010728836059570312, "objective/train/value_min": -0.66650390625, "objective/train/value_reward_corr": 0.22482529947299068, "objective/train/value_std": 0.022491455078125, "objective/train/weight_avg": 1.0016282796859741, "objective/train/weighted_lm_loss": 3.0836069583892822, "objective/train/weights_max": 1.0197124481201172, "objective/train/weights_min": 0.9088911414146423, "theoretical_loss": 3.465435842319499, "tokens_seen": 1773010944 }, { "epoch": 0.07, "learning_rate": 0.0009414807630132558, "loss": 1.3633, "theoretical_loss": 3.465392107136246, "tokens_seen": 1773273088 }, { "epoch": 0.07, "learning_rate": 0.0009411574523116715, "loss": 1.3182, "theoretical_loss": 3.465304661591732, "tokens_seen": 1773797376 }, { "epoch": 0.07, "learning_rate": 0.0009408341416100872, "loss": 1.3347, "theoretical_loss": 3.4652172491246223, "tokens_seen": 1774321664 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.019795317202806473, "objective/train/docs_used": 1004180, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6909523010253906, "objective/train/original_loss": 2.6909523010253906, "objective/train/theoretical_loss": 3.465162633119897, "objective/train/tokens_used": 133508576, "objective/train/value_avg": -0.031951904296875, "objective/train/value_loss": 0.0025198645889759064, "objective/train/value_max": -0.001361846923828125, "objective/train/value_min": -0.479736328125, "objective/train/value_reward_corr": 0.297478175835225, "objective/train/value_std": 0.02911376953125, "objective/train/weight_avg": 1.0019919872283936, "objective/train/weighted_lm_loss": 2.6979353427886963, "objective/train/weights_max": 1.0352239608764648, "objective/train/weights_min": 0.9367367029190063, "theoretical_loss": 3.465162633119897, "tokens_seen": 1774649344 }, { "epoch": 0.07, "learning_rate": 0.0009405108309085031, "loss": 1.3084, "theoretical_loss": 3.4651298697126363, "tokens_seen": 1774845952 }, { "epoch": 0.07, "learning_rate": 0.0009401875202069189, "loss": 1.338, "theoretical_loss": 3.4650425233335156, "tokens_seen": 1775370240 }, { "epoch": 0.07, "learning_rate": 0.0009398642095053347, "loss": 1.3048, "theoretical_loss": 3.464955209965024, "tokens_seen": 1775894528 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.015902899205684662, "objective/train/docs_used": 1004852, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7525441646575928, "objective/train/original_loss": 2.7525439262390137, "objective/train/theoretical_loss": 3.464889746588515, "objective/train/tokens_used": 135146976, "objective/train/value_avg": -0.0273284912109375, "objective/train/value_loss": 0.0035540140233933926, "objective/train/value_max": -0.0022792816162109375, "objective/train/value_min": -0.425048828125, "objective/train/value_reward_corr": 0.1848296751518394, "objective/train/value_std": 0.0198516845703125, "objective/train/weight_avg": 1.0016077756881714, "objective/train/weighted_lm_loss": 2.7578306198120117, "objective/train/weights_max": 1.0167765617370605, "objective/train/weights_min": 0.9156023263931274, "theoretical_loss": 3.464889746588515, "tokens_seen": 1776287744 }, { "epoch": 0.07, "learning_rate": 0.0009395408988037504, "loss": 1.3236, "theoretical_loss": 3.464867929584944, "tokens_seen": 1776418816 }, { "epoch": 0.07, "learning_rate": 0.0009392175881021661, "loss": 1.3581, "theoretical_loss": 3.4647806821710834, "tokens_seen": 1776943104 }, { "epoch": 0.07, "learning_rate": 0.000938894277400582, "loss": 1.3597, "theoretical_loss": 3.4646934677012675, "tokens_seen": 1777467392 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.01112272683531046, "objective/train/docs_used": 1006253, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6933717727661133, "objective/train/original_loss": 2.693370819091797, "objective/train/theoretical_loss": 3.4646171820471774, "objective/train/tokens_used": 136785376, "objective/train/value_avg": -0.03460693359375, "objective/train/value_loss": 0.007106421049684286, "objective/train/value_max": -0.0017824172973632812, "objective/train/value_min": -0.7919921875, "objective/train/value_reward_corr": 0.35903249971928225, "objective/train/value_std": 0.036590576171875, "objective/train/weight_avg": 1.0011472702026367, "objective/train/weighted_lm_loss": 2.69800066947937, "objective/train/weights_max": 1.0379207134246826, "objective/train/weights_min": 0.909112811088562, "theoretical_loss": 3.4646171820471774, "tokens_seen": 1777926144 }, { "epoch": 0.07, "learning_rate": 0.0009385709666989978, "loss": 1.3448, "theoretical_loss": 3.4646062861533466, "tokens_seen": 1777991680 }, { "epoch": 0.07, "learning_rate": 0.0009382476559974135, "loss": 1.3252, "theoretical_loss": 3.4645191375051905, "tokens_seen": 1778515968 }, { "epoch": 0.07, "learning_rate": 0.0009379243452958293, "loss": 1.3179, "theoretical_loss": 3.46443202173469, "tokens_seen": 1779040256 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.011692299507558346, "objective/train/docs_used": 1006973, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.629805326461792, "objective/train/original_loss": 2.62980580329895, "objective/train/theoretical_loss": 3.4643449388197594, "objective/train/tokens_used": 138423776, "objective/train/value_avg": -0.03253173828125, "objective/train/value_loss": 0.00714264577254653, "objective/train/value_max": -0.0006961822509765625, "objective/train/value_min": -0.9306640625, "objective/train/value_reward_corr": 0.48646543082602633, "objective/train/value_std": 0.04345703125, "objective/train/weight_avg": 1.0012043714523315, "objective/train/weighted_lm_loss": 2.6335527896881104, "objective/train/weights_max": 1.0600122213363647, "objective/train/weights_min": 0.9059730768203735, "theoretical_loss": 3.4643449388197594, "tokens_seen": 1779564544 }, { "epoch": 0.07, "learning_rate": 0.000937601034594245, "loss": 1.3683, "theoretical_loss": 3.4643449388197594, "tokens_seen": 1779564544 }, { "epoch": 0.07, "learning_rate": 0.0009372777238926609, "loss": 1.3642, "theoretical_loss": 3.4642578887383317, "tokens_seen": 1780088832 }, { "epoch": 0.07, "learning_rate": 0.0009369544131910767, "loss": 1.372, "theoretical_loss": 3.464170871468363, "tokens_seen": 1780613120 }, { "epoch": 0.07, "learning_rate": 0.0009366311024894924, "loss": 1.3158, "theoretical_loss": 3.4640838869878303, "tokens_seen": 1781137408 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.018312735483050346, "objective/train/docs_used": 1008497, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8306729793548584, "objective/train/original_loss": 2.8306729793548584, "objective/train/theoretical_loss": 3.4640730162321747, "objective/train/tokens_used": 140062176, "objective/train/value_avg": -0.030670166015625, "objective/train/value_loss": 0.0038036066107451916, "objective/train/value_max": -0.0014324188232421875, "objective/train/value_min": -0.30322265625, "objective/train/value_reward_corr": 0.42490119851596214, "objective/train/value_std": 0.0286712646484375, "objective/train/weight_avg": 1.0018501281738281, "objective/train/weighted_lm_loss": 2.837808609008789, "objective/train/weights_max": 1.0246018171310425, "objective/train/weights_min": 0.9281264543533325, "theoretical_loss": 3.4640730162321747, "tokens_seen": 1781202944 }, { "epoch": 0.07, "learning_rate": 0.0009363077917879082, "loss": 1.3424, "theoretical_loss": 3.4639969352747317, "tokens_seen": 1781661696 }, { "epoch": 0.07, "learning_rate": 0.0009359844810863239, "loss": 1.3617, "theoretical_loss": 3.4639100163070866, "tokens_seen": 1782185984 }, { "epoch": 0.07, "learning_rate": 0.0009356611703847397, "loss": 1.3523, "theoretical_loss": 3.463823130062935, "tokens_seen": 1782710272 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.01626390404999256, "objective/train/docs_used": 1009251, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5526881217956543, "objective/train/original_loss": 2.5526883602142334, "objective/train/theoretical_loss": 3.463801413612372, "objective/train/tokens_used": 141700576, "objective/train/value_avg": -0.0271453857421875, "objective/train/value_loss": 0.0022771048825234175, "objective/train/value_max": -0.0008106231689453125, "objective/train/value_min": -0.284912109375, "objective/train/value_reward_corr": 0.3217005471972202, "objective/train/value_std": 0.0252227783203125, "objective/train/weight_avg": 1.001637578010559, "objective/train/weighted_lm_loss": 2.5583765506744385, "objective/train/weights_max": 1.027501106262207, "objective/train/weights_min": 0.9381319284439087, "theoretical_loss": 3.463801413612372, "tokens_seen": 1782841344 }, { "epoch": 0.07, "learning_rate": 0.0009353378596831556, "loss": 1.3158, "theoretical_loss": 3.463736276520339, "tokens_seen": 1783234560 }, { "epoch": 0.07, "learning_rate": 0.0009350145489815713, "loss": 1.3343, "theoretical_loss": 3.4636494556573822, "tokens_seen": 1783758848 }, { "epoch": 0.07, "learning_rate": 0.0009346912382799871, "loss": 1.3255, "theoretical_loss": 3.4635626674521682, "tokens_seen": 1784283136 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.017123175784945488, "objective/train/docs_used": 1010356, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.880633592605591, "objective/train/original_loss": 2.88063383102417, "objective/train/theoretical_loss": 3.4635301302903216, "objective/train/tokens_used": 143338976, "objective/train/value_avg": -0.026123046875, "objective/train/value_loss": 0.0040292865596711636, "objective/train/value_max": -0.001239776611328125, "objective/train/value_min": -0.302001953125, "objective/train/value_reward_corr": 0.1765753188411494, "objective/train/value_std": 0.0185394287109375, "objective/train/weight_avg": 1.0017321109771729, "objective/train/weighted_lm_loss": 2.886716842651367, "objective/train/weights_max": 1.0185389518737793, "objective/train/weights_min": 0.9091359972953796, "theoretical_loss": 3.4635301302903216, "tokens_seen": 1784479744 }, { "epoch": 0.08, "learning_rate": 0.0009343679275784028, "loss": 1.3465, "theoretical_loss": 3.4634759118828216, "tokens_seen": 1784807424 }, { "epoch": 0.08, "learning_rate": 0.0009340446168768186, "loss": 1.3484, "theoretical_loss": 3.463389188927489, "tokens_seen": 1785331712 }, { "epoch": 0.08, "learning_rate": 0.0009337213061752345, "loss": 1.3087, "theoretical_loss": 3.463302498564338, "tokens_seen": 1785856000 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.013772552832961082, "objective/train/docs_used": 1010805, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8227343559265137, "objective/train/original_loss": 2.8227343559265137, "objective/train/theoretical_loss": 3.4632591655980134, "objective/train/tokens_used": 144977376, "objective/train/value_avg": -0.03558349609375, "objective/train/value_loss": 0.006259028799831867, "objective/train/value_max": -0.0017414093017578125, "objective/train/value_min": -0.420166015625, "objective/train/value_reward_corr": 0.5337192020473854, "objective/train/value_std": 0.048583984375, "objective/train/weight_avg": 1.0014082193374634, "objective/train/weighted_lm_loss": 2.8276538848876953, "objective/train/weights_max": 1.0308732986450195, "objective/train/weights_min": 0.9202226400375366, "theoretical_loss": 3.4632591655980134, "tokens_seen": 1786118144 }, { "epoch": 0.08, "learning_rate": 0.0009333979954736502, "loss": 1.3304, "theoretical_loss": 3.463215840771556, "tokens_seen": 1786380288 }, { "epoch": 0.08, "learning_rate": 0.0009330746847720661, "loss": 1.3135, "theoretical_loss": 3.4631292155273528, "tokens_seen": 1786904576 }, { "epoch": 0.08, "learning_rate": 0.0009327513740704817, "loss": 1.3629, "theoretical_loss": 3.463042622809959, "tokens_seen": 1787428864 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.013923184014856815, "objective/train/docs_used": 1011887, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.6959261894226074, "objective/train/original_loss": 3.6959269046783447, "objective/train/theoretical_loss": 3.4629885188694427, "objective/train/tokens_used": 146615776, "objective/train/value_avg": -0.0374755859375, "objective/train/value_loss": 0.006140228360891342, "objective/train/value_max": -0.0007915496826171875, "objective/train/value_min": -0.8310546875, "objective/train/value_reward_corr": 0.5016135169982936, "objective/train/value_std": 0.04083251953125, "objective/train/weight_avg": 1.001422643661499, "objective/train/weighted_lm_loss": 3.7022182941436768, "objective/train/weights_max": 1.0523158311843872, "objective/train/weights_min": 0.909868597984314, "theoretical_loss": 3.4629885188694427, "tokens_seen": 1787756544 }, { "epoch": 0.08, "learning_rate": 0.0009324280633688975, "loss": 1.3682, "theoretical_loss": 3.462956062597625, "tokens_seen": 1787953152 }, { "epoch": 0.08, "learning_rate": 0.0009321047526673134, "loss": 1.3456, "theoretical_loss": 3.462869534868623, "tokens_seen": 1788477440 }, { "epoch": 0.08, "learning_rate": 0.0009317814419657291, "loss": 1.3409, "theoretical_loss": 3.4627830396012467, "tokens_seen": 1789001728 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.01144155953079462, "objective/train/docs_used": 1012559, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5609939098358154, "objective/train/original_loss": 2.5609939098358154, "objective/train/theoretical_loss": 3.4627181894406096, "objective/train/tokens_used": 148254176, "objective/train/value_avg": -0.020904541015625, "objective/train/value_loss": 0.002107446314767003, "objective/train/value_max": -0.00042057037353515625, "objective/train/value_min": -0.42724609375, "objective/train/value_reward_corr": 0.060359174994928065, "objective/train/value_std": 0.016387939453125, "objective/train/weight_avg": 1.0011545419692993, "objective/train/weighted_lm_loss": 2.565641164779663, "objective/train/weights_max": 1.0327448844909668, "objective/train/weights_min": 0.9387598037719727, "theoretical_loss": 3.4627181894406096, "tokens_seen": 1789394944 }, { "epoch": 0.08, "learning_rate": 0.0009314581312641449, "loss": 1.3583, "theoretical_loss": 3.4626965767738094, "tokens_seen": 1789526016 }, { "epoch": 0.08, "learning_rate": 0.0009311348205625606, "loss": 1.3637, "theoretical_loss": 3.4626101463646455, "tokens_seen": 1790050304 }, { "epoch": 0.08, "learning_rate": 0.0009308115098609764, "loss": 1.3993, "theoretical_loss": 3.462523748352111, "tokens_seen": 1790574592 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.01727796345949173, "objective/train/docs_used": 1013169, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.2217695713043213, "objective/train/original_loss": 3.221769332885742, "objective/train/theoretical_loss": 3.462448176649503, "objective/train/tokens_used": 149892576, "objective/train/value_avg": -0.031524658203125, "objective/train/value_loss": 0.0036083287559449673, "objective/train/value_max": -0.002216339111328125, "objective/train/value_min": -0.48974609375, "objective/train/value_reward_corr": 0.39688783395559624, "objective/train/value_std": 0.026153564453125, "objective/train/weight_avg": 1.001745581626892, "objective/train/weighted_lm_loss": 3.228137493133545, "objective/train/weights_max": 1.0432844161987305, "objective/train/weights_min": 0.9286147952079773, "theoretical_loss": 3.462448176649503, "tokens_seen": 1791033344 }, { "epoch": 0.08, "learning_rate": 0.0009304881991593923, "loss": 1.322, "theoretical_loss": 3.462437382714582, "tokens_seen": 1791098880 }, { "epoch": 0.08, "learning_rate": 0.000930164888457808, "loss": 1.3564, "theoretical_loss": 3.4623510494304552, "tokens_seen": 1791623168 }, { "epoch": 0.08, "learning_rate": 0.0009298415777562238, "loss": 1.3314, "theoretical_loss": 3.462264748478149, "tokens_seen": 1792147456 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.015472723171114922, "objective/train/docs_used": 1014342, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.797625780105591, "objective/train/original_loss": 2.797625780105591, "objective/train/theoretical_loss": 3.4621784798361013, "objective/train/tokens_used": 151530976, "objective/train/value_avg": -0.0281524658203125, "objective/train/value_loss": 0.004299018066376448, "objective/train/value_max": -0.0009698867797851562, "objective/train/value_min": -0.326904296875, "objective/train/value_reward_corr": 0.17791231508715483, "objective/train/value_std": 0.0252532958984375, "objective/train/weight_avg": 1.0015684366226196, "objective/train/weighted_lm_loss": 2.8032617568969727, "objective/train/weights_max": 1.0305237770080566, "objective/train/weights_min": 0.9194014072418213, "theoretical_loss": 3.4621784798361013, "tokens_seen": 1792671744 }, { "epoch": 0.08, "learning_rate": 0.0009295182670546395, "loss": 1.3296, "theoretical_loss": 3.4621784798361013, "tokens_seen": 1792671744 }, { "epoch": 0.08, "learning_rate": 0.0009291949563530553, "loss": 1.352, "theoretical_loss": 3.462092243482771, "tokens_seen": 1793196032 }, { "epoch": 0.08, "learning_rate": 0.000928871645651471, "loss": 1.327, "theoretical_loss": 3.4620060393966385, "tokens_seen": 1793720320 }, { "epoch": 0.08, "learning_rate": 0.0009285483349498869, "loss": 1.3478, "theoretical_loss": 3.4619198675562046, "tokens_seen": 1794244608 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.017922066152095795, "objective/train/docs_used": 1015045, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8019073009490967, "objective/train/original_loss": 2.8019070625305176, "objective/train/theoretical_loss": 3.461909098342356, "objective/train/tokens_used": 153169376, "objective/train/value_avg": -0.045166015625, "objective/train/value_loss": 0.011594807729125023, "objective/train/value_max": -0.0006642341613769531, "objective/train/value_min": -0.9755859375, "objective/train/value_reward_corr": 0.4625835020636822, "objective/train/value_std": 0.0657958984375, "objective/train/weight_avg": 1.0018492937088013, "objective/train/weighted_lm_loss": 2.8070921897888184, "objective/train/weights_max": 1.0693342685699463, "objective/train/weights_min": 0.9087886810302734, "theoretical_loss": 3.461909098342356, "tokens_seen": 1794310144 }, { "epoch": 0.08, "learning_rate": 0.0009282250242483027, "loss": 1.3554, "theoretical_loss": 3.4618337279399887, "tokens_seen": 1794768896 }, { "epoch": 0.08, "learning_rate": 0.0009279017135467184, "loss": 1.3384, "theoretical_loss": 3.461747620526534, "tokens_seen": 1795293184 }, { "epoch": 0.08, "learning_rate": 0.0009275784028451342, "loss": 1.2953, "theoretical_loss": 3.461661545294402, "tokens_seen": 1795817472 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.009509465657174587, "objective/train/docs_used": 1016473, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.858431339263916, "objective/train/original_loss": 2.858431339263916, "objective/train/theoretical_loss": 3.4616400315121902, "objective/train/tokens_used": 154807776, "objective/train/value_avg": -0.02734375, "objective/train/value_loss": 0.0034070294350385666, "objective/train/value_max": -0.0012063980102539062, "objective/train/value_min": -0.74072265625, "objective/train/value_reward_corr": 0.5220012620026281, "objective/train/value_std": 0.0247650146484375, "objective/train/weight_avg": 1.0009677410125732, "objective/train/weighted_lm_loss": 2.8618812561035156, "objective/train/weights_max": 1.0193452835083008, "objective/train/weights_min": 0.9078107476234436, "theoretical_loss": 3.4616400315121902, "tokens_seen": 1795948544 }, { "epoch": 0.08, "learning_rate": 0.0009272550921435499, "loss": 1.3128, "theoretical_loss": 3.4615755022221757, "tokens_seen": 1796341760 }, { "epoch": 0.08, "learning_rate": 0.0009269317814419658, "loss": 1.3157, "theoretical_loss": 3.4614894912884577, "tokens_seen": 1796866048 }, { "epoch": 0.08, "learning_rate": 0.0009266084707403816, "loss": 1.327, "theoretical_loss": 3.461403512471872, "tokens_seen": 1797390336 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.01209726370871067, "objective/train/docs_used": 1017203, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0119988918304443, "objective/train/original_loss": 3.0119986534118652, "objective/train/theoretical_loss": 3.461371278691491, "objective/train/tokens_used": 156446176, "objective/train/value_avg": -0.0294342041015625, "objective/train/value_loss": 0.0070980265736579895, "objective/train/value_max": -0.0009145736694335938, "objective/train/value_min": -0.90673828125, "objective/train/value_reward_corr": 0.5035739628498457, "objective/train/value_std": 0.040191650390625, "objective/train/weight_avg": 1.0012445449829102, "objective/train/weighted_lm_loss": 3.0164411067962646, "objective/train/weights_max": 1.0661208629608154, "objective/train/weights_min": 0.9108452796936035, "theoretical_loss": 3.461371278691491, "tokens_seen": 1797586944 }, { "epoch": 0.08, "learning_rate": 0.0009262851600387972, "loss": 1.3781, "theoretical_loss": 3.4613175657510626, "tokens_seen": 1797914624 }, { "epoch": 0.08, "learning_rate": 0.0009259618493372131, "loss": 1.3458, "theoretical_loss": 3.461231651104694, "tokens_seen": 1798438912 }, { "epoch": 0.08, "learning_rate": 0.0009256385386356288, "loss": 1.3388, "theoretical_loss": 3.461145768511451, "tokens_seen": 1798963200 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.01165814884006977, "objective/train/docs_used": 1018428, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1016581058502197, "objective/train/original_loss": 3.1016578674316406, "objective/train/theoretical_loss": 3.4611028392280967, "objective/train/tokens_used": 158084576, "objective/train/value_avg": -0.0291900634765625, "objective/train/value_loss": 0.006055927835404873, "objective/train/value_max": -0.0006799697875976562, "objective/train/value_min": -0.80615234375, "objective/train/value_reward_corr": 0.40706039341135664, "objective/train/value_std": 0.0301666259765625, "objective/train/weight_avg": 1.0011956691741943, "objective/train/weighted_lm_loss": 3.106729030609131, "objective/train/weights_max": 1.0370866060256958, "objective/train/weights_min": 0.909893274307251, "theoretical_loss": 3.4611028392280967, "tokens_seen": 1799225344 }, { "epoch": 0.08, "learning_rate": 0.0009253152279340447, "loss": 1.3101, "theoretical_loss": 3.461059917950039, "tokens_seen": 1799487488 }, { "epoch": 0.08, "learning_rate": 0.0009249919172324605, "loss": 1.3278, "theoretical_loss": 3.460974099399184, "tokens_seen": 1800011776 }, { "epoch": 0.08, "learning_rate": 0.0009246686065308761, "loss": 1.3387, "theoretical_loss": 3.460888312837632, "tokens_seen": 1800536064 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.016710370779037476, "objective/train/docs_used": 1018922, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9108660221099854, "objective/train/original_loss": 2.9108662605285645, "objective/train/theoretical_loss": 3.4608347124717938, "objective/train/tokens_used": 159722976, "objective/train/value_avg": -0.0294952392578125, "objective/train/value_loss": 0.001418989966623485, "objective/train/value_max": -0.0013408660888671875, "objective/train/value_min": -0.483154296875, "objective/train/value_reward_corr": 0.4158590744920556, "objective/train/value_std": 0.0263671875, "objective/train/weight_avg": 1.001678228378296, "objective/train/weighted_lm_loss": 2.917433261871338, "objective/train/weights_max": 1.0475298166275024, "objective/train/weights_min": 0.9148980975151062, "theoretical_loss": 3.4608347124717938, "tokens_seen": 1800863744 }, { "epoch": 0.09, "learning_rate": 0.000924345295829292, "loss": 1.3601, "theoretical_loss": 3.4608025582441484, "tokens_seen": 1801060352 }, { "epoch": 0.09, "learning_rate": 0.0009240219851277077, "loss": 1.3437, "theoretical_loss": 3.4607168355975197, "tokens_seen": 1801584640 }, { "epoch": 0.09, "learning_rate": 0.0009236986744261236, "loss": 1.3216, "theoretical_loss": 3.460631144876554, "tokens_seen": 1802108928 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.010354031808674335, "objective/train/docs_used": 1019593, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.34501576423645, "objective/train/original_loss": 2.34501576423645, "objective/train/theoretical_loss": 3.4605668977743074, "objective/train/tokens_used": 161361376, "objective/train/value_avg": -0.021148681640625, "objective/train/value_loss": 0.0009800423868000507, "objective/train/value_max": -0.0008425712585449219, "objective/train/value_min": -0.354736328125, "objective/train/value_reward_corr": 0.11820115601817677, "objective/train/value_std": 0.01520538330078125, "objective/train/weight_avg": 1.0010403394699097, "objective/train/weighted_lm_loss": 2.34946608543396, "objective/train/weights_max": 1.0325263738632202, "objective/train/weights_min": 0.9796761274337769, "theoretical_loss": 3.4605668977743074, "tokens_seen": 1802502144 }, { "epoch": 0.09, "learning_rate": 0.0009233753637245394, "loss": 1.3203, "theoretical_loss": 3.4605454860600773, "tokens_seen": 1802633216 }, { "epoch": 0.09, "learning_rate": 0.000923052053022955, "loss": 1.3608, "theoretical_loss": 3.4604598591269364, "tokens_seen": 1803157504 }, { "epoch": 0.09, "learning_rate": 0.0009227287423213709, "loss": 1.3525, "theoretical_loss": 3.4603742640559996, "tokens_seen": 1803681792 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.010627981275320053, "objective/train/docs_used": 1020148, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.486337661743164, "objective/train/original_loss": 2.486337661743164, "objective/train/theoretical_loss": 3.4602993944892955, "objective/train/tokens_used": 162999776, "objective/train/value_avg": -0.024658203125, "objective/train/value_loss": 0.0016179310623556376, "objective/train/value_max": -0.001220703125, "objective/train/value_min": -0.243408203125, "objective/train/value_reward_corr": 0.36027068010735636, "objective/train/value_std": 0.021575927734375, "objective/train/weight_avg": 1.0010708570480347, "objective/train/weighted_lm_loss": 2.4902284145355225, "objective/train/weights_max": 1.0208206176757812, "objective/train/weights_min": 0.9345300793647766, "theoretical_loss": 3.4602993944892955, "tokens_seen": 1804140544 }, { "epoch": 0.09, "learning_rate": 0.0009224054316197866, "loss": 1.3627, "theoretical_loss": 3.460288700826154, "tokens_seen": 1804206080 }, { "epoch": 0.09, "learning_rate": 0.0009220821209182024, "loss": 1.3346, "theoretical_loss": 3.4602031694163067, "tokens_seen": 1804730368 }, { "epoch": 0.09, "learning_rate": 0.0009217588102166182, "loss": 1.3598, "theoretical_loss": 3.4601176698053857, "tokens_seen": 1805254656 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.004096715245395899, "objective/train/docs_used": 1021470, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.505892515182495, "objective/train/original_loss": 2.505892515182495, "objective/train/theoretical_loss": 3.4600322019723393, "objective/train/tokens_used": 164638176, "objective/train/value_avg": -0.0244903564453125, "objective/train/value_loss": 0.005556440446525812, "objective/train/value_max": -0.0012254714965820312, "objective/train/value_min": -0.411376953125, "objective/train/value_reward_corr": 0.3553018536294205, "objective/train/value_std": 0.01806640625, "objective/train/weight_avg": 1.0004371404647827, "objective/train/weighted_lm_loss": 2.5097756385803223, "objective/train/weights_max": 1.0415101051330566, "objective/train/weights_min": 0.9497764110565186, "theoretical_loss": 3.4600322019723393, "tokens_seen": 1805778944 }, { "epoch": 0.09, "learning_rate": 0.0009214354995150339, "loss": 1.39, "theoretical_loss": 3.4600322019723393, "tokens_seen": 1805778944 }, { "epoch": 0.09, "learning_rate": 0.0009211121888134498, "loss": 1.3468, "theoretical_loss": 3.4599467658961345, "tokens_seen": 1806303232 }, { "epoch": 0.09, "learning_rate": 0.0009207888781118655, "loss": 1.3413, "theoretical_loss": 3.4598613615557605, "tokens_seen": 1806827520 }, { "epoch": 0.09, "learning_rate": 0.0009204655674102813, "loss": 1.3575, "theoretical_loss": 3.4597759889302235, "tokens_seen": 1807351808 }, { "epoch": 0.09, "objective/train/advantage_avg": -0.008534092456102371, "objective/train/docs_used": 1022105, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.98738431930542, "objective/train/original_loss": 2.98738431930542, "objective/train/theoretical_loss": 3.459765319580936, "objective/train/tokens_used": 166276576, "objective/train/value_avg": -0.032073974609375, "objective/train/value_loss": 0.017543990164995193, "objective/train/value_max": -0.0009365081787109375, "objective/train/value_min": -0.81640625, "objective/train/value_reward_corr": 0.524797179247818, "objective/train/value_std": 0.03900146484375, "objective/train/weight_avg": 0.9992323517799377, "objective/train/weighted_lm_loss": 2.983170747756958, "objective/train/weights_max": 1.0429717302322388, "objective/train/weights_min": 0.909217894077301, "theoretical_loss": 3.459765319580936, "tokens_seen": 1807417344 }, { "epoch": 0.09, "learning_rate": 0.0009201422567086971, "loss": 1.3521, "theoretical_loss": 3.459690647998552, "tokens_seen": 1807876096 }, { "epoch": 0.09, "learning_rate": 0.0009198189460071128, "loss": 1.2863, "theoretical_loss": 3.4596053387397943, "tokens_seen": 1808400384 }, { "epoch": 0.09, "learning_rate": 0.0009194956353055286, "loss": 1.3687, "theoretical_loss": 3.459520061133017, "tokens_seen": 1808924672 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.018418915569782257, "objective/train/docs_used": 1022607, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.22208309173584, "objective/train/original_loss": 3.22208309173584, "objective/train/theoretical_loss": 3.4594987466744938, "objective/train/tokens_used": 167914976, "objective/train/value_avg": -0.022796630859375, "objective/train/value_loss": 0.0019618684891611338, "objective/train/value_max": -0.0012693405151367188, "objective/train/value_min": -0.138671875, "objective/train/value_reward_corr": 0.030530455688500092, "objective/train/value_std": 0.01407623291015625, "objective/train/weight_avg": 1.0018515586853027, "objective/train/weighted_lm_loss": 3.2290968894958496, "objective/train/weights_max": 1.0136220455169678, "objective/train/weights_min": 0.9069201350212097, "theoretical_loss": 3.4594987466744938, "tokens_seen": 1809055744 }, { "epoch": 0.09, "learning_rate": 0.0009191723246039444, "loss": 1.3376, "theoretical_loss": 3.459434815157308, "tokens_seen": 1809448960 }, { "epoch": 0.09, "learning_rate": 0.0009188490139023602, "loss": 1.372, "theoretical_loss": 3.4593496007917754, "tokens_seen": 1809973248 }, { "epoch": 0.09, "learning_rate": 0.000918525703200776, "loss": 1.3575, "theoretical_loss": 3.459264418015546, "tokens_seen": 1810497536 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.009265373460948467, "objective/train/docs_used": 1024104, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5107479095458984, "objective/train/original_loss": 2.5107481479644775, "objective/train/theoretical_loss": 3.45923248261432, "objective/train/tokens_used": 169553376, "objective/train/value_avg": -0.027740478515625, "objective/train/value_loss": 0.0027151373215019703, "objective/train/value_max": -0.0011653900146484375, "objective/train/value_min": -0.367431640625, "objective/train/value_reward_corr": 0.43387602754552324, "objective/train/value_std": 0.035736083984375, "objective/train/weight_avg": 1.0009400844573975, "objective/train/weighted_lm_loss": 2.5143840312957764, "objective/train/weights_max": 1.0330325365066528, "objective/train/weights_min": 0.9164726138114929, "theoretical_loss": 3.45923248261432, "tokens_seen": 1810694144 }, { "epoch": 0.09, "learning_rate": 0.0009182023924991918, "loss": 1.3262, "theoretical_loss": 3.4591792668077668, "tokens_seen": 1811021824 }, { "epoch": 0.09, "learning_rate": 0.0009178790817976075, "loss": 1.3764, "theoretical_loss": 3.4590941471476047, "tokens_seen": 1811546112 }, { "epoch": 0.09, "learning_rate": 0.0009175557710960233, "loss": 1.3472, "theoretical_loss": 3.4590090590142464, "tokens_seen": 1812070400 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.017964031547307968, "objective/train/docs_used": 1024786, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.206712484359741, "objective/train/original_loss": 2.206712484359741, "objective/train/theoretical_loss": 3.4589665267636196, "objective/train/tokens_used": 171191776, "objective/train/value_avg": -0.037353515625, "objective/train/value_loss": 0.0025908886454999447, "objective/train/value_max": -0.0015735626220703125, "objective/train/value_min": -0.50390625, "objective/train/value_reward_corr": 0.7125278549854666, "objective/train/value_std": 0.06640625, "objective/train/weight_avg": 1.0018094778060913, "objective/train/weighted_lm_loss": 2.2121036052703857, "objective/train/weights_max": 1.0434858798980713, "objective/train/weights_min": 0.9628862142562866, "theoretical_loss": 3.4589665267636196, "tokens_seen": 1812332544 }, { "epoch": 0.09, "learning_rate": 0.0009172324603944391, "loss": 1.3495, "theoretical_loss": 3.4589240023868983, "tokens_seen": 1812594688 }, { "epoch": 0.09, "learning_rate": 0.0009169091496928548, "loss": 1.3177, "theoretical_loss": 3.458838977244787, "tokens_seen": 1813118976 }, { "epoch": 0.09, "learning_rate": 0.0009165858389912707, "loss": 1.3292, "theoretical_loss": 3.458753983567158, "tokens_seen": 1813643264 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.011178228072822094, "objective/train/docs_used": 1026014, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8447437286376953, "objective/train/original_loss": 2.844743490219116, "objective/train/theoretical_loss": 3.458700878487483, "objective/train/tokens_used": 172830176, "objective/train/value_avg": -0.0219879150390625, "objective/train/value_loss": 0.0022421712055802345, "objective/train/value_max": -0.0008864402770996094, "objective/train/value_min": -0.36962890625, "objective/train/value_reward_corr": 0.2897632329217842, "objective/train/value_std": 0.016357421875, "objective/train/weight_avg": 1.0011287927627563, "objective/train/weighted_lm_loss": 2.848616600036621, "objective/train/weights_max": 1.018039345741272, "objective/train/weights_min": 0.9314413666725159, "theoretical_loss": 3.458700878487483, "tokens_seen": 1813970944 }, { "epoch": 0.09, "learning_rate": 0.0009162625282896863, "loss": 1.3697, "theoretical_loss": 3.458669021333277, "tokens_seen": 1814167552 }, { "epoch": 0.09, "learning_rate": 0.0009159392175881022, "loss": 1.3868, "theoretical_loss": 3.4585840905224283, "tokens_seen": 1814691840 }, { "epoch": 0.09, "learning_rate": 0.000915615906886518, "loss": 1.3421, "theoretical_loss": 3.4584991911139173, "tokens_seen": 1815216128 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.004450577311217785, "objective/train/docs_used": 1026828, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.55802583694458, "objective/train/original_loss": 2.55802583694458, "objective/train/theoretical_loss": 3.4584355371528805, "objective/train/tokens_used": 174468576, "objective/train/value_avg": -0.030487060546875, "objective/train/value_loss": 0.005518625024706125, "objective/train/value_max": -0.0010728836059570312, "objective/train/value_min": -0.45751953125, "objective/train/value_reward_corr": 0.3104376912779205, "objective/train/value_std": 0.024169921875, "objective/train/weight_avg": 1.0004724264144897, "objective/train/weighted_lm_loss": 2.559920072555542, "objective/train/weights_max": 1.039858102798462, "objective/train/weights_min": 0.9235664010047913, "theoretical_loss": 3.4584355371528805, "tokens_seen": 1815609344 }, { "epoch": 0.09, "learning_rate": 0.0009152925961849337, "loss": 1.3512, "theoretical_loss": 3.4584143230870694, "tokens_seen": 1815740416 }, { "epoch": 0.09, "learning_rate": 0.0009149692854833496, "loss": 1.3537, "theoretical_loss": 3.458329486421227, "tokens_seen": 1816264704 }, { "epoch": 0.09, "learning_rate": 0.0009146459747817652, "loss": 1.2899, "theoretical_loss": 3.4582446810957546, "tokens_seen": 1816788992 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.01670873910188675, "objective/train/docs_used": 1028029, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.580246925354004, "objective/train/original_loss": 2.580246925354004, "objective/train/theoretical_loss": 3.458170502128655, "objective/train/tokens_used": 176106976, "objective/train/value_avg": -0.0262451171875, "objective/train/value_loss": 0.0016571712912991643, "objective/train/value_max": -0.0014667510986328125, "objective/train/value_min": -0.287841796875, "objective/train/value_reward_corr": 0.1887211517270196, "objective/train/value_std": 0.0212249755859375, "objective/train/weight_avg": 1.0016791820526123, "objective/train/weighted_lm_loss": 2.585279941558838, "objective/train/weights_max": 1.0281622409820557, "objective/train/weights_min": 0.9472679495811462, "theoretical_loss": 3.458170502128655, "tokens_seen": 1817247744 }, { "epoch": 0.1, "learning_rate": 0.000914322664080181, "loss": 1.3446, "theoretical_loss": 3.4581599070900344, "tokens_seen": 1817313280 }, { "epoch": 0.1, "learning_rate": 0.0009139993533785969, "loss": 1.3762, "theoretical_loss": 3.4580751643834704, "tokens_seen": 1817837568 }, { "epoch": 0.1, "learning_rate": 0.0009136760426770126, "loss": 1.3527, "theoretical_loss": 3.457990452955483, "tokens_seen": 1818361856 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.013686026446521282, "objective/train/docs_used": 1028792, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.661067008972168, "objective/train/original_loss": 2.661066770553589, "objective/train/theoretical_loss": 3.457905772785515, "objective/train/tokens_used": 177745376, "objective/train/value_avg": -0.0313720703125, "objective/train/value_loss": 0.003775616642087698, "objective/train/value_max": -0.0011034011840820312, "objective/train/value_min": -0.6103515625, "objective/train/value_reward_corr": 0.5073443386654255, "objective/train/value_std": 0.051361083984375, "objective/train/weight_avg": 1.0013872385025024, "objective/train/weighted_lm_loss": 2.666315793991089, "objective/train/weights_max": 1.0357862710952759, "objective/train/weights_min": 0.9238344430923462, "theoretical_loss": 3.457905772785515, "tokens_seen": 1818886144 }, { "epoch": 0.1, "learning_rate": 0.0009133527319754285, "loss": 1.3742, "theoretical_loss": 3.457905772785515, "tokens_seen": 1818886144 }, { "epoch": 0.1, "learning_rate": 0.0009130294212738441, "loss": 1.3784, "theoretical_loss": 3.457821123853026, "tokens_seen": 1819410432 }, { "epoch": 0.1, "learning_rate": 0.0009127061105722599, "loss": 1.3495, "theoretical_loss": 3.457736506137498, "tokens_seen": 1819934720 }, { "epoch": 0.1, "learning_rate": 0.0009123827998706758, "loss": 1.3817, "theoretical_loss": 3.4576519196184297, "tokens_seen": 1820459008 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.011889573186635971, "objective/train/docs_used": 1029550, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4395668506622314, "objective/train/original_loss": 2.4395673274993896, "objective/train/theoretical_loss": 3.4576413484960264, "objective/train/tokens_used": 179383776, "objective/train/value_avg": -0.02716064453125, "objective/train/value_loss": 0.006041501648724079, "objective/train/value_max": -0.0008897781372070312, "objective/train/value_min": -0.99462890625, "objective/train/value_reward_corr": 0.39351005823572544, "objective/train/value_std": 0.042999267578125, "objective/train/weight_avg": 1.0012186765670776, "objective/train/weighted_lm_loss": 2.4443342685699463, "objective/train/weights_max": 1.071531891822815, "objective/train/weights_min": 0.9079253673553467, "theoretical_loss": 3.4576413484960264, "tokens_seen": 1820524544 }, { "epoch": 0.1, "learning_rate": 0.0009120594891690915, "loss": 1.349, "theoretical_loss": 3.4575673642753397, "tokens_seen": 1820983296 }, { "epoch": 0.1, "learning_rate": 0.0009117361784675074, "loss": 1.3803, "theoretical_loss": 3.457482840087768, "tokens_seen": 1821507584 }, { "epoch": 0.1, "learning_rate": 0.000911412867765923, "loss": 1.3977, "theoretical_loss": 3.457398347035271, "tokens_seen": 1822031872 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.020278722047805786, "objective/train/docs_used": 1030259, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.425473928451538, "objective/train/original_loss": 2.425474166870117, "objective/train/theoretical_loss": 3.4573772286346087, "objective/train/tokens_used": 181022176, "objective/train/value_avg": -0.02947998046875, "objective/train/value_loss": 0.0015365226427093148, "objective/train/value_max": -0.002010345458984375, "objective/train/value_min": -0.380126953125, "objective/train/value_reward_corr": 0.15759146671531335, "objective/train/value_std": 0.0277252197265625, "objective/train/weight_avg": 1.0020356178283691, "objective/train/weighted_lm_loss": 2.4316399097442627, "objective/train/weights_max": 1.0375102758407593, "objective/train/weights_min": 0.9559341073036194, "theoretical_loss": 3.4573772286346087, "tokens_seen": 1822162944 }, { "epoch": 0.1, "learning_rate": 0.0009110895570643388, "loss": 1.3884, "theoretical_loss": 3.4573138850974265, "tokens_seen": 1822556160 }, { "epoch": 0.1, "learning_rate": 0.0009107662463627547, "loss": 1.3467, "theoretical_loss": 3.45722945425383, "tokens_seen": 1823080448 }, { "epoch": 0.1, "learning_rate": 0.0009104429356611704, "loss": 1.3262, "theoretical_loss": 3.457145054484098, "tokens_seen": 1823604736 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.008096680045127869, "objective/train/docs_used": 1031478, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5234389305114746, "objective/train/original_loss": 2.5234389305114746, "objective/train/theoretical_loss": 3.457113412577522, "objective/train/tokens_used": 182660576, "objective/train/value_avg": -0.024169921875, "objective/train/value_loss": 0.00654465751722455, "objective/train/value_max": -0.0009965896606445312, "objective/train/value_min": -0.939453125, "objective/train/value_reward_corr": 0.4027768820537409, "objective/train/value_std": 0.03466796875, "objective/train/weight_avg": 1.000841736793518, "objective/train/weighted_lm_loss": 2.526357412338257, "objective/train/weights_max": 1.0689239501953125, "objective/train/weights_min": 0.9077720046043396, "theoretical_loss": 3.457113412577522, "tokens_seen": 1823801344 }, { "epoch": 0.1, "learning_rate": 0.0009101196249595862, "loss": 1.382, "theoretical_loss": 3.457060685767865, "tokens_seen": 1824129024 }, { "epoch": 0.1, "learning_rate": 0.0009097963142580019, "loss": 1.3801, "theoretical_loss": 3.456976348084784, "tokens_seen": 1824653312 }, { "epoch": 0.1, "learning_rate": 0.0009094730035564177, "loss": 1.3475, "theoretical_loss": 3.4568920414145294, "tokens_seen": 1825177600 }, { "debugging/Self-BLEU-5": 0.2978020507513697, "debugging/distinct-1-grams": 0.7493044984092464, "debugging/distinct-2-grams": 0.9684160445030011, "debugging/entropy-1-grams": 4.978374022966436, "debugging/entropy-2-grams": 5.581523811888752, "debugging/length": 564.3333333333334, "debugging/num_segments": 3, "debugging/raw_token_scores_avg": 0.06154995411634445, "debugging/raw_token_scores_std": 0.16516533493995667, "epoch": 0.1, "objective/train/advantage_avg": 0.016863783821463585, "objective/train/docs_used": 1032081, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.336369752883911, "objective/train/original_loss": 2.336369752883911, "objective/train/theoretical_loss": 3.456849899702865, "objective/train/tokens_used": 184298976, "objective/train/value_avg": -0.07843017578125, "objective/train/value_loss": 0.008144880644977093, "objective/train/value_max": -0.0006799697875976562, "objective/train/value_min": -0.91748046875, "objective/train/value_reward_corr": 0.8517878801507224, "objective/train/value_std": 0.1600341796875, "objective/train/weight_avg": 1.0017268657684326, "objective/train/weighted_lm_loss": 2.338615655899048, "objective/train/weights_max": 1.0377262830734253, "objective/train/weights_min": 0.9440195560455322, "theoretical_loss": 3.456849899702865, "tokens_seen": 1825439744 }, { "epoch": 0.1, "learning_rate": 0.0009091496928548336, "loss": 1.3193, "theoretical_loss": 3.456807765736793, "tokens_seen": 1825701888 }, { "epoch": 0.1, "learning_rate": 0.0009088263821532493, "loss": 1.3066, "theoretical_loss": 3.456723521031286, "tokens_seen": 1826226176 }, { "epoch": 0.1, "learning_rate": 0.0009085030714516651, "loss": 1.3045, "theoretical_loss": 3.4566393072777393, "tokens_seen": 1826750464 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.005332792643457651, "objective/train/docs_used": 1033277, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.745845079421997, "objective/train/original_loss": 2.745845317840576, "objective/train/theoretical_loss": 3.4565866893905657, "objective/train/tokens_used": 185937376, "objective/train/value_avg": -0.03729248046875, "objective/train/value_loss": 0.007874896749854088, "objective/train/value_max": -0.000743865966796875, "objective/train/value_min": -0.9638671875, "objective/train/value_reward_corr": 0.6469564054515377, "objective/train/value_std": 0.0733642578125, "objective/train/weight_avg": 1.0005720853805542, "objective/train/weighted_lm_loss": 2.7482707500457764, "objective/train/weights_max": 1.0813016891479492, "objective/train/weights_min": 0.912566602230072, "theoretical_loss": 3.4565866893905657, "tokens_seen": 1827078144 }, { "epoch": 0.1, "learning_rate": 0.0009081797607500808, "loss": 1.385, "theoretical_loss": 3.4565551244559023, "tokens_seen": 1827274752 }, { "epoch": 0.1, "learning_rate": 0.0009078564500484966, "loss": 1.3762, "theoretical_loss": 3.456470972545543, "tokens_seen": 1827799040 }, { "epoch": 0.1, "learning_rate": 0.0009075331393469123, "loss": 1.3677, "theoretical_loss": 3.4563868515264504, "tokens_seen": 1828323328 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.013400369323790073, "objective/train/docs_used": 1034021, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.758011817932129, "objective/train/original_loss": 2.75801157951355, "objective/train/theoretical_loss": 3.456323781022376, "objective/train/tokens_used": 187575776, "objective/train/value_avg": -0.02099609375, "objective/train/value_loss": 0.0012145718792453408, "objective/train/value_max": -0.0007853507995605469, "objective/train/value_min": -0.5693359375, "objective/train/value_reward_corr": 0.292689297476701, "objective/train/value_std": 0.017181396484375, "objective/train/weight_avg": 1.0013459920883179, "objective/train/weighted_lm_loss": 2.762784719467163, "objective/train/weights_max": 1.0284851789474487, "objective/train/weights_min": 0.9446413516998291, "theoretical_loss": 3.456323781022376, "tokens_seen": 1828716544 }, { "epoch": 0.1, "learning_rate": 0.0009072098286453282, "loss": 1.3551, "theoretical_loss": 3.4563027613784305, "tokens_seen": 1828847616 }, { "epoch": 0.1, "learning_rate": 0.000906886517943744, "loss": 1.3477, "theoretical_loss": 3.4562187020813084, "tokens_seen": 1829371904 }, { "epoch": 0.1, "learning_rate": 0.0009065632072421597, "loss": 1.334, "theoretical_loss": 3.4561346736149297, "tokens_seen": 1829896192 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.011075984686613083, "objective/train/docs_used": 1035168, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.395397186279297, "objective/train/original_loss": 2.3953967094421387, "objective/train/theoretical_loss": 3.456061173981862, "objective/train/tokens_used": 189214176, "objective/train/value_avg": -0.0241546630859375, "objective/train/value_loss": 0.003936620429158211, "objective/train/value_max": -0.0007381439208984375, "objective/train/value_min": -0.8828125, "objective/train/value_reward_corr": 0.5122640028903909, "objective/train/value_std": 0.03277587890625, "objective/train/weight_avg": 1.0011268854141235, "objective/train/weighted_lm_loss": 2.398672342300415, "objective/train/weights_max": 1.0367008447647095, "objective/train/weights_min": 0.9118328094482422, "theoretical_loss": 3.456061173981862, "tokens_seen": 1830354944 }, { "epoch": 0.1, "learning_rate": 0.0009062398965405755, "loss": 1.358, "theoretical_loss": 3.4560506759591574, "tokens_seen": 1830420480 }, { "epoch": 0.1, "learning_rate": 0.0009059165858389912, "loss": 1.3601, "theoretical_loss": 3.4559667090938744, "tokens_seen": 1830944768 }, { "epoch": 0.1, "learning_rate": 0.0009055932751374071, "loss": 1.3552, "theoretical_loss": 3.455882772998981, "tokens_seen": 1831469056 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.013324304483830929, "objective/train/docs_used": 1035908, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8835527896881104, "objective/train/original_loss": 2.8835525512695312, "objective/train/theoretical_loss": 3.4557988676543987, "objective/train/tokens_used": 190852576, "objective/train/value_avg": -0.0258331298828125, "objective/train/value_loss": 0.0031281886622309685, "objective/train/value_max": -0.001461029052734375, "objective/train/value_min": -0.88623046875, "objective/train/value_reward_corr": 0.5278142782576064, "objective/train/value_std": 0.03302001953125, "objective/train/weight_avg": 1.0013478994369507, "objective/train/weighted_lm_loss": 2.8881900310516357, "objective/train/weights_max": 1.0618212223052979, "objective/train/weights_min": 0.9062968492507935, "theoretical_loss": 3.4557988676543987, "tokens_seen": 1831993344 }, { "epoch": 0.1, "learning_rate": 0.0009052699644358229, "loss": 1.3749, "theoretical_loss": 3.4557988676543987, "tokens_seen": 1831993344 }, { "epoch": 0.1, "learning_rate": 0.0009049466537342385, "loss": 1.3542, "theoretical_loss": 3.4557149930400657, "tokens_seen": 1832517632 }, { "epoch": 0.1, "learning_rate": 0.0009046233430326544, "loss": 1.3572, "theoretical_loss": 3.455631149135941, "tokens_seen": 1833041920 }, { "epoch": 0.1, "learning_rate": 0.0009043000323310701, "loss": 1.3792, "theoretical_loss": 3.4555473359219997, "tokens_seen": 1833566208 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.012445789761841297, "objective/train/docs_used": 1036404, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7219645977020264, "objective/train/original_loss": 2.7219645977020264, "objective/train/theoretical_loss": 3.4555368614271647, "objective/train/tokens_used": 192490976, "objective/train/value_avg": -0.0240936279296875, "objective/train/value_loss": 0.003354343120008707, "objective/train/value_max": -0.001438140869140625, "objective/train/value_min": -0.400634765625, "objective/train/value_reward_corr": 0.21654513389565339, "objective/train/value_std": 0.0186004638671875, "objective/train/weight_avg": 1.0012609958648682, "objective/train/weighted_lm_loss": 2.7261531352996826, "objective/train/weights_max": 1.0317267179489136, "objective/train/weights_min": 0.9160973429679871, "theoretical_loss": 3.4555368614271647, "tokens_seen": 1833631744 }, { "epoch": 0.11, "learning_rate": 0.000903976721629486, "loss": 1.3577, "theoretical_loss": 3.4554635533782387, "tokens_seen": 1834090496 }, { "epoch": 0.11, "learning_rate": 0.0009036534109279018, "loss": 1.3762, "theoretical_loss": 3.4553798014846717, "tokens_seen": 1834614784 }, { "epoch": 0.11, "learning_rate": 0.0009033301002263175, "loss": 1.3797, "theoretical_loss": 3.4552960802213315, "tokens_seen": 1835139072 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.011568804271519184, "objective/train/docs_used": 1037817, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.749013900756836, "objective/train/original_loss": 2.749013900756836, "objective/train/theoretical_loss": 3.455275154689131, "objective/train/tokens_used": 194129376, "objective/train/value_avg": -0.022186279296875, "objective/train/value_loss": 0.0019368311623111367, "objective/train/value_max": -0.0010204315185546875, "objective/train/value_min": -0.2130126953125, "objective/train/value_reward_corr": 0.48370147129938634, "objective/train/value_std": 0.0193939208984375, "objective/train/weight_avg": 1.001166582107544, "objective/train/weighted_lm_loss": 2.753288507461548, "objective/train/weights_max": 1.0212608575820923, "objective/train/weights_min": 0.9414647221565247, "theoretical_loss": 3.455275154689131, "tokens_seen": 1835270144 }, { "epoch": 0.11, "learning_rate": 0.0009030067895247333, "loss": 1.3643, "theoretical_loss": 3.4552123895682696, "tokens_seen": 1835663360 }, { "epoch": 0.11, "learning_rate": 0.000902683478823149, "loss": 1.3709, "theoretical_loss": 3.4551287295055575, "tokens_seen": 1836187648 }, { "epoch": 0.11, "learning_rate": 0.0009023601681215649, "loss": 1.4017, "theoretical_loss": 3.4550451000132822, "tokens_seen": 1836711936 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.008040018379688263, "objective/train/docs_used": 1038484, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0827157497406006, "objective/train/original_loss": 3.0827157497406006, "objective/train/theoretical_loss": 3.45501374683106, "objective/train/tokens_used": 195767776, "objective/train/value_avg": -0.0308380126953125, "objective/train/value_loss": 0.005961114075034857, "objective/train/value_max": -0.0013151168823242188, "objective/train/value_min": -0.436767578125, "objective/train/value_reward_corr": 0.527228071473447, "objective/train/value_std": 0.04803466796875, "objective/train/weight_avg": 1.0008333921432495, "objective/train/weighted_lm_loss": 3.086162805557251, "objective/train/weights_max": 1.0362770557403564, "objective/train/weights_min": 0.9102947115898132, "theoretical_loss": 3.45501374683106, "tokens_seen": 1836908544 }, { "epoch": 0.11, "learning_rate": 0.0009020368574199807, "loss": 1.3942, "theoretical_loss": 3.4549615010715535, "tokens_seen": 1837236224 }, { "epoch": 0.11, "learning_rate": 0.0009017135467183964, "loss": 1.3839, "theoretical_loss": 3.454877932660496, "tokens_seen": 1837760512 }, { "epoch": 0.11, "learning_rate": 0.0009013902360168122, "loss": 1.3645, "theoretical_loss": 3.454794394760256, "tokens_seen": 1838284800 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.018331820145249367, "objective/train/docs_used": 1039769, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6463239192962646, "objective/train/original_loss": 2.6463241577148438, "objective/train/theoretical_loss": 3.4547526372454924, "objective/train/tokens_used": 197406176, "objective/train/value_avg": -0.039947509765625, "objective/train/value_loss": 0.009630311280488968, "objective/train/value_max": -0.0012493133544921875, "objective/train/value_min": -0.92236328125, "objective/train/value_reward_corr": 0.3306335285275551, "objective/train/value_std": 0.07275390625, "objective/train/weight_avg": 1.0018807649612427, "objective/train/weighted_lm_loss": 2.6504368782043457, "objective/train/weights_max": 1.085198163986206, "objective/train/weights_min": 0.9076930284500122, "theoretical_loss": 3.4547526372454924, "tokens_seen": 1838546944 }, { "epoch": 0.11, "learning_rate": 0.0009010669253152279, "loss": 1.3545, "theoretical_loss": 3.4547108873509957, "tokens_seen": 1838809088 }, { "epoch": 0.11, "learning_rate": 0.0009007436146136437, "loss": 1.3688, "theoretical_loss": 3.454627410412898, "tokens_seen": 1839333376 }, { "epoch": 0.11, "learning_rate": 0.0009004203039120596, "loss": 1.3489, "theoretical_loss": 3.454543963926162, "tokens_seen": 1839857664 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.00937011931091547, "objective/train/docs_used": 1040587, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9872031211853027, "objective/train/original_loss": 2.9872028827667236, "objective/train/theoretical_loss": 3.454491825326745, "objective/train/tokens_used": 199044576, "objective/train/value_avg": -0.0240631103515625, "objective/train/value_loss": 0.003533502807840705, "objective/train/value_max": -0.0015850067138671875, "objective/train/value_min": -0.60107421875, "objective/train/value_reward_corr": 0.36039279887567954, "objective/train/value_std": 0.0239715576171875, "objective/train/weight_avg": 1.0009543895721436, "objective/train/weighted_lm_loss": 2.990556001663208, "objective/train/weights_max": 1.0497000217437744, "objective/train/weights_min": 0.9109688997268677, "theoretical_loss": 3.454491825326745, "tokens_seen": 1840185344 }, { "epoch": 0.11, "learning_rate": 0.0009000969932104753, "loss": 1.3454, "theoretical_loss": 3.4544605478710086, "tokens_seen": 1840381952 }, { "epoch": 0.11, "learning_rate": 0.0008997736825088911, "loss": 1.3197, "theoretical_loss": 3.454377162227674, "tokens_seen": 1840906240 }, { "epoch": 0.11, "learning_rate": 0.0008994503718073068, "loss": 1.3273, "theoretical_loss": 3.4542938069764144, "tokens_seen": 1841430528 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0067117162980139256, "objective/train/docs_used": 1041999, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3058645725250244, "objective/train/original_loss": 2.3058645725250244, "objective/train/theoretical_loss": 3.4542313104709024, "objective/train/tokens_used": 200682976, "objective/train/value_avg": -0.0214691162109375, "objective/train/value_loss": 0.004382243379950523, "objective/train/value_max": -0.0010204315185546875, "objective/train/value_min": -0.798828125, "objective/train/value_reward_corr": 0.3924880830616596, "objective/train/value_std": 0.02630615234375, "objective/train/weight_avg": 1.0006928443908691, "objective/train/weighted_lm_loss": 2.308091163635254, "objective/train/weights_max": 1.0820709466934204, "objective/train/weights_min": 0.921471357345581, "theoretical_loss": 3.4542313104709024, "tokens_seen": 1841823744 }, { "epoch": 0.11, "learning_rate": 0.0008991270611057226, "loss": 1.3597, "theoretical_loss": 3.454210482097505, "tokens_seen": 1841954816 }, { "epoch": 0.11, "learning_rate": 0.0008988037504041385, "loss": 1.3348, "theoretical_loss": 3.454127187571237, "tokens_seen": 1842479104 }, { "epoch": 0.11, "learning_rate": 0.0008984804397025542, "loss": 1.368, "theoretical_loss": 3.454043923377923, "tokens_seen": 1843003392 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.023547906428575516, "objective/train/docs_used": 1042423, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.10109281539917, "objective/train/original_loss": 3.10109281539917, "objective/train/theoretical_loss": 3.4539710920758093, "objective/train/tokens_used": 202321376, "objective/train/value_avg": -0.028045654296875, "objective/train/value_loss": 0.0015745371347293258, "objective/train/value_max": -0.0016355514526367188, "objective/train/value_min": -0.366943359375, "objective/train/value_reward_corr": 0.08687829949978458, "objective/train/value_std": 0.0225067138671875, "objective/train/weight_avg": 1.002362608909607, "objective/train/weighted_lm_loss": 3.1102919578552246, "objective/train/weights_max": 1.0365887880325317, "objective/train/weights_min": 0.9630240201950073, "theoretical_loss": 3.4539710920758093, "tokens_seen": 1843462144 }, { "epoch": 0.11, "learning_rate": 0.0008981571290009699, "loss": 1.3699, "theoretical_loss": 3.453960689497891, "tokens_seen": 1843527680 }, { "epoch": 0.11, "learning_rate": 0.0008978338182993857, "loss": 1.3406, "theoretical_loss": 3.453877485911491, "tokens_seen": 1844051968 }, { "epoch": 0.11, "learning_rate": 0.0008975105075978015, "loss": 1.3744, "theoretical_loss": 3.453794312599088, "tokens_seen": 1844576256 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.01281779259443283, "objective/train/docs_used": 1043833, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.177685260772705, "objective/train/original_loss": 2.177685022354126, "objective/train/theoretical_loss": 3.4537111695410667, "objective/train/tokens_used": 203959776, "objective/train/value_avg": -0.0193328857421875, "objective/train/value_loss": 0.0011891117319464684, "objective/train/value_max": -0.0011425018310546875, "objective/train/value_min": -0.399658203125, "objective/train/value_reward_corr": 0.17842299099971787, "objective/train/value_std": 0.015350341796875, "objective/train/weight_avg": 1.0012876987457275, "objective/train/weighted_lm_loss": 2.1813805103302, "objective/train/weights_max": 1.0406731367111206, "objective/train/weights_min": 0.9096883535385132, "theoretical_loss": 3.4537111695410667, "tokens_seen": 1845100544 }, { "epoch": 0.11, "learning_rate": 0.0008971871968962173, "loss": 1.3545, "theoretical_loss": 3.4537111695410667, "tokens_seen": 1845100544 }, { "epoch": 0.11, "learning_rate": 0.0008968638861946331, "loss": 1.3082, "theoretical_loss": 3.4536280567178297, "tokens_seen": 1845624832 }, { "epoch": 0.11, "learning_rate": 0.0008965405754930488, "loss": 1.3658, "theoretical_loss": 3.453544974109798, "tokens_seen": 1846149120 }, { "epoch": 0.11, "learning_rate": 0.0008962172647914646, "loss": 1.3257, "theoretical_loss": 3.453461921697411, "tokens_seen": 1846673408 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.019865481182932854, "objective/train/docs_used": 1044447, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4344873428344727, "objective/train/original_loss": 2.4344871044158936, "objective/train/theoretical_loss": 3.4534515422680205, "objective/train/tokens_used": 205598176, "objective/train/value_avg": -0.035430908203125, "objective/train/value_loss": 0.004597270395606756, "objective/train/value_max": -0.0011692047119140625, "objective/train/value_min": -0.978515625, "objective/train/value_reward_corr": 0.7130333836098193, "objective/train/value_std": 0.07232666015625, "objective/train/weight_avg": 1.002009391784668, "objective/train/weighted_lm_loss": 2.440577745437622, "objective/train/weights_max": 1.0856362581253052, "objective/train/weights_min": 0.915577232837677, "theoretical_loss": 3.4534515422680205, "tokens_seen": 1846738944 }, { "epoch": 0.11, "learning_rate": 0.0008958939540898804, "loss": 1.3475, "theoretical_loss": 3.4533788994611267, "tokens_seen": 1847197696 }, { "epoch": 0.11, "learning_rate": 0.0008955706433882961, "loss": 1.345, "theoretical_loss": 3.4532959073814204, "tokens_seen": 1847721984 }, { "epoch": 0.11, "learning_rate": 0.000895247332686712, "loss": 1.3311, "theoretical_loss": 3.4532129454387857, "tokens_seen": 1848246272 }, { "epoch": 0.11, "objective/train/advantage_avg": -0.0015411610947921872, "objective/train/docs_used": 1045763, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6061105728149414, "objective/train/original_loss": 2.6061110496520996, "objective/train/theoretical_loss": 3.4531922096597603, "objective/train/tokens_used": 207236576, "objective/train/value_avg": -0.02679443359375, "objective/train/value_loss": 0.00499833794310689, "objective/train/value_max": -0.0006289482116699219, "objective/train/value_min": -0.6240234375, "objective/train/value_reward_corr": 0.5280622038481751, "objective/train/value_std": 0.028045654296875, "objective/train/weight_avg": 0.9998705983161926, "objective/train/weighted_lm_loss": 2.608754873275757, "objective/train/weights_max": 1.0231578350067139, "objective/train/weights_min": 0.9191367626190186, "theoretical_loss": 3.4531922096597603, "tokens_seen": 1848377344 }, { "epoch": 0.11, "learning_rate": 0.0008949240219851277, "loss": 1.3197, "theoretical_loss": 3.453130013613735, "tokens_seen": 1848770560 }, { "epoch": 0.11, "learning_rate": 0.0008946007112835435, "loss": 1.3888, "theoretical_loss": 3.4530471118867982, "tokens_seen": 1849294848 }, { "epoch": 0.11, "learning_rate": 0.0008942774005819593, "loss": 1.3507, "theoretical_loss": 3.452964240238524, "tokens_seen": 1849819136 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.012501529417932034, "objective/train/docs_used": 1046397, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.796678066253662, "objective/train/original_loss": 2.796677827835083, "objective/train/theoretical_loss": 3.4529331711211086, "objective/train/tokens_used": 208874976, "objective/train/value_avg": -0.0219268798828125, "objective/train/value_loss": 0.0025876746512949467, "objective/train/value_max": -0.0016679763793945312, "objective/train/value_min": -0.81982421875, "objective/train/value_reward_corr": 0.5424291116935668, "objective/train/value_std": 0.02557373046875, "objective/train/weight_avg": 1.001262903213501, "objective/train/weighted_lm_loss": 2.80112624168396, "objective/train/weights_max": 1.0398235321044922, "objective/train/weights_min": 0.9088719487190247, "theoretical_loss": 3.4529331711211086, "tokens_seen": 1850015744 }, { "epoch": 0.12, "learning_rate": 0.000893954089880375, "loss": 1.3331, "theoretical_loss": 3.4528813986494775, "tokens_seen": 1850343424 }, { "epoch": 0.12, "learning_rate": 0.0008936307791787909, "loss": 1.3524, "theoretical_loss": 3.4527985871002445, "tokens_seen": 1850867712 }, { "epoch": 0.12, "learning_rate": 0.0008933074684772065, "loss": 1.356, "theoretical_loss": 3.452715805571427, "tokens_seen": 1851392000 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.010834094136953354, "objective/train/docs_used": 1047383, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.554520845413208, "objective/train/original_loss": 2.554520606994629, "objective/train/theoretical_loss": 3.452674426058617, "objective/train/tokens_used": 210513376, "objective/train/value_avg": -0.0194244384765625, "objective/train/value_loss": 0.0023255962878465652, "objective/train/value_max": -0.0014047622680664062, "objective/train/value_min": -0.2119140625, "objective/train/value_reward_corr": 0.12820834052490684, "objective/train/value_std": 0.0139007568359375, "objective/train/weight_avg": 1.0010948181152344, "objective/train/weighted_lm_loss": 2.5581321716308594, "objective/train/weights_max": 1.0212607383728027, "objective/train/weights_min": 0.9159020185470581, "theoretical_loss": 3.452674426058617, "tokens_seen": 1851654144 }, { "epoch": 0.12, "learning_rate": 0.0008929841577756224, "loss": 1.313, "theoretical_loss": 3.4526330540436447, "tokens_seen": 1851916288 }, { "epoch": 0.12, "learning_rate": 0.0008926608470740382, "loss": 1.3567, "theoretical_loss": 3.4525503324975366, "tokens_seen": 1852440576 }, { "epoch": 0.12, "learning_rate": 0.0008923375363724539, "loss": 1.3298, "theoretical_loss": 3.452467640913759, "tokens_seen": 1852964864 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.011183884926140308, "objective/train/docs_used": 1048172, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2745420932769775, "objective/train/original_loss": 2.2745418548583984, "objective/train/theoretical_loss": 3.4524159738805564, "objective/train/tokens_used": 212151776, "objective/train/value_avg": -0.02484130859375, "objective/train/value_loss": 0.0017835103208199143, "objective/train/value_max": -0.0014162063598632812, "objective/train/value_min": -0.9853515625, "objective/train/value_reward_corr": 0.6217730075822601, "objective/train/value_std": 0.03448486328125, "objective/train/weight_avg": 1.0011271238327026, "objective/train/weighted_lm_loss": 2.27793025970459, "objective/train/weights_max": 1.1023811101913452, "objective/train/weights_min": 0.906582772731781, "theoretical_loss": 3.4524159738805564, "tokens_seen": 1853292544 }, { "epoch": 0.12, "learning_rate": 0.0008920142256708698, "loss": 1.3096, "theoretical_loss": 3.4523849792729866, "tokens_seen": 1853489152 }, { "epoch": 0.12, "learning_rate": 0.0008916909149692854, "loss": 1.3398, "theoretical_loss": 3.4523023475559107, "tokens_seen": 1854013440 }, { "epoch": 0.12, "learning_rate": 0.0008913676042677012, "loss": 1.4, "theoretical_loss": 3.4522197457432426, "tokens_seen": 1854537728 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.018251460045576096, "objective/train/docs_used": 1049323, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7758071422576904, "objective/train/original_loss": 2.7758071422576904, "objective/train/theoretical_loss": 3.452157813996915, "objective/train/tokens_used": 213790176, "objective/train/value_avg": -0.0251922607421875, "objective/train/value_loss": 0.001722839311696589, "objective/train/value_max": -0.001155853271484375, "objective/train/value_min": -0.387451171875, "objective/train/value_reward_corr": 0.14230095946921784, "objective/train/value_std": 0.0207366943359375, "objective/train/weight_avg": 1.0018337965011597, "objective/train/weighted_lm_loss": 2.7817487716674805, "objective/train/weights_max": 1.0255498886108398, "objective/train/weights_min": 0.9264189004898071, "theoretical_loss": 3.452157813996915, "tokens_seen": 1854930944 }, { "epoch": 0.12, "learning_rate": 0.0008910442935661171, "loss": 1.3559, "theoretical_loss": 3.45213717381571, "tokens_seen": 1855062016 }, { "epoch": 0.12, "learning_rate": 0.0008907209828645328, "loss": 1.3323, "theoretical_loss": 3.4520546317540584, "tokens_seen": 1855586304 }, { "epoch": 0.12, "learning_rate": 0.0008903976721629487, "loss": 1.3103, "theoretical_loss": 3.451972119539051, "tokens_seen": 1856110592 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.004051879048347473, "objective/train/docs_used": 1049929, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8554153442382812, "objective/train/original_loss": 2.8554153442382812, "objective/train/theoretical_loss": 3.4518999458193877, "objective/train/tokens_used": 215428576, "objective/train/value_avg": -0.03118896484375, "objective/train/value_loss": 0.009460647590458393, "objective/train/value_max": -0.000583648681640625, "objective/train/value_min": -0.94189453125, "objective/train/value_reward_corr": 0.5531453622381912, "objective/train/value_std": 0.04888916015625, "objective/train/weight_avg": 1.0004515647888184, "objective/train/weighted_lm_loss": 2.857351541519165, "objective/train/weights_max": 1.0660535097122192, "objective/train/weights_min": 0.919568657875061, "theoretical_loss": 3.4518999458193877, "tokens_seen": 1856569344 }, { "epoch": 0.12, "learning_rate": 0.0008900743614613643, "loss": 1.386, "theoretical_loss": 3.451889637151471, "tokens_seen": 1856634880 }, { "epoch": 0.12, "learning_rate": 0.0008897510507597801, "loss": 1.3276, "theoretical_loss": 3.4518071845721168, "tokens_seen": 1857159168 }, { "epoch": 0.12, "learning_rate": 0.000889427740058196, "loss": 1.3436, "theoretical_loss": 3.4517247617818057, "tokens_seen": 1857683456 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.00716676190495491, "objective/train/docs_used": 1050597, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.665181875228882, "objective/train/original_loss": 2.6651816368103027, "objective/train/theoretical_loss": 3.451642368761372, "objective/train/tokens_used": 217066976, "objective/train/value_avg": -0.0394287109375, "objective/train/value_loss": 0.007157967891544104, "objective/train/value_max": -0.0007824897766113281, "objective/train/value_min": -0.99072265625, "objective/train/value_reward_corr": 0.8217771399238453, "objective/train/value_std": 0.09637451171875, "objective/train/weight_avg": 1.0007518529891968, "objective/train/weighted_lm_loss": 2.667362689971924, "objective/train/weights_max": 1.0755515098571777, "objective/train/weights_min": 0.9072287678718567, "theoretical_loss": 3.451642368761372, "tokens_seen": 1858207744 }, { "epoch": 0.12, "learning_rate": 0.0008891044293566117, "loss": 1.3415, "theoretical_loss": 3.451642368761372, "tokens_seen": 1858207744 }, { "epoch": 0.12, "learning_rate": 0.0008887811186550275, "loss": 1.3547, "theoretical_loss": 3.4515600054916695, "tokens_seen": 1858732032 }, { "epoch": 0.12, "learning_rate": 0.0008884578079534433, "loss": 1.3218, "theoretical_loss": 3.451477671953568, "tokens_seen": 1859256320 }, { "epoch": 0.12, "learning_rate": 0.000888134497251859, "loss": 1.3387, "theoretical_loss": 3.451395368127955, "tokens_seen": 1859780608 }, { "epoch": 0.12, "objective/train/advantage_avg": -0.0010156165808439255, "objective/train/docs_used": 1051522, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.368706226348877, "objective/train/original_loss": 2.368706464767456, "objective/train/theoretical_loss": 3.4513850822379615, "objective/train/tokens_used": 218705376, "objective/train/value_avg": -0.044677734375, "objective/train/value_loss": 0.003936692140996456, "objective/train/value_max": -0.0008759498596191406, "objective/train/value_min": -0.8642578125, "objective/train/value_reward_corr": 0.8582582151744592, "objective/train/value_std": 0.0875244140625, "objective/train/weight_avg": 0.9999180436134338, "objective/train/weighted_lm_loss": 2.3694427013397217, "objective/train/weights_max": 1.0401557683944702, "objective/train/weights_min": 0.9566848874092102, "theoretical_loss": 3.4513850822379615, "tokens_seen": 1859846144 }, { "epoch": 0.12, "learning_rate": 0.0008878111865502749, "loss": 1.3805, "theoretical_loss": 3.4513130939957364, "tokens_seen": 1860304896 }, { "epoch": 0.12, "learning_rate": 0.0008874878758486906, "loss": 1.3398, "theoretical_loss": 3.4512308495378363, "tokens_seen": 1860829184 }, { "epoch": 0.12, "learning_rate": 0.0008871645651471064, "loss": 1.339, "theoretical_loss": 3.451148634735195, "tokens_seen": 1861353472 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.01331527903676033, "objective/train/docs_used": 1052031, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6123218536376953, "objective/train/original_loss": 2.612321615219116, "objective/train/theoretical_loss": 3.451128085665937, "objective/train/tokens_used": 220343776, "objective/train/value_avg": -0.0195770263671875, "objective/train/value_loss": 0.0013200418325141072, "objective/train/value_max": -0.0013942718505859375, "objective/train/value_min": -0.407958984375, "objective/train/value_reward_corr": 0.2559257005998084, "objective/train/value_std": 0.0176544189453125, "objective/train/weight_avg": 1.0013381242752075, "objective/train/weighted_lm_loss": 2.616758346557617, "objective/train/weights_max": 1.0403281450271606, "objective/train/weights_min": 0.9393684267997742, "theoretical_loss": 3.451128085665937, "tokens_seen": 1861484544 }, { "epoch": 0.12, "learning_rate": 0.0008868412544455222, "loss": 1.356, "theoretical_loss": 3.4510664495687715, "tokens_seen": 1861877760 }, { "epoch": 0.12, "learning_rate": 0.0008865179437439379, "loss": 1.364, "theoretical_loss": 3.450984294019541, "tokens_seen": 1862402048 }, { "epoch": 0.12, "learning_rate": 0.0008861946330423537, "loss": 1.372, "theoretical_loss": 3.450902168068499, "tokens_seen": 1862926336 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0067131477408111095, "objective/train/docs_used": 1053498, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4567911624908447, "objective/train/original_loss": 2.4567906856536865, "objective/train/theoretical_loss": 3.4508713784637646, "objective/train/tokens_used": 221982176, "objective/train/value_avg": -0.040283203125, "objective/train/value_loss": 0.009684083983302116, "objective/train/value_max": -0.0012598037719726562, "objective/train/value_min": -0.9443359375, "objective/train/value_reward_corr": 0.640861671659967, "objective/train/value_std": 0.06256103515625, "objective/train/weight_avg": 1.0007189512252808, "objective/train/weighted_lm_loss": 2.458822250366211, "objective/train/weights_max": 1.0592142343521118, "objective/train/weights_min": 0.9087275862693787, "theoretical_loss": 3.4508713784637646, "tokens_seen": 1863122944 }, { "epoch": 0.12, "learning_rate": 0.0008858713223407695, "loss": 1.3643, "theoretical_loss": 3.450820071696655, "tokens_seen": 1863450624 }, { "epoch": 0.12, "learning_rate": 0.0008855480116391853, "loss": 1.3293, "theoretical_loss": 3.4507380048850385, "tokens_seen": 1863974912 }, { "epoch": 0.12, "learning_rate": 0.0008852247009376011, "loss": 1.2994, "theoretical_loss": 3.450655967614696, "tokens_seen": 1864499200 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0029104354325681925, "objective/train/docs_used": 1054143, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.701347589492798, "objective/train/original_loss": 2.701347589492798, "objective/train/theoretical_loss": 3.450614960051584, "objective/train/tokens_used": 223620576, "objective/train/value_avg": -0.034271240234375, "objective/train/value_loss": 0.007510049268603325, "objective/train/value_max": -0.0008072853088378906, "objective/train/value_min": -0.93212890625, "objective/train/value_reward_corr": 0.6342514790764773, "objective/train/value_std": 0.064208984375, "objective/train/weight_avg": 1.0003280639648438, "objective/train/weighted_lm_loss": 2.7030081748962402, "objective/train/weights_max": 1.0801401138305664, "objective/train/weights_min": 0.9115707278251648, "theoretical_loss": 3.450614960051584, "tokens_seen": 1864761344 }, { "epoch": 0.12, "learning_rate": 0.0008849013902360168, "loss": 1.3516, "theoretical_loss": 3.450573959866691, "tokens_seen": 1865023488 }, { "epoch": 0.12, "learning_rate": 0.0008845780795344325, "loss": 1.3247, "theoretical_loss": 3.450491981622105, "tokens_seen": 1865547776 }, { "epoch": 0.12, "learning_rate": 0.0008842547688328484, "loss": 1.3083, "theoretical_loss": 3.4504100328620355, "tokens_seen": 1866072064 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0017880556406453252, "objective/train/docs_used": 1055457, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5671629905700684, "objective/train/original_loss": 2.56716251373291, "objective/train/theoretical_loss": 3.450358829851208, "objective/train/tokens_used": 225258976, "objective/train/value_avg": -0.0177154541015625, "objective/train/value_loss": 0.007107984274625778, "objective/train/value_max": -0.0008134841918945312, "objective/train/value_min": -0.77685546875, "objective/train/value_reward_corr": 0.4647070408513925, "objective/train/value_std": 0.0228118896484375, "objective/train/weight_avg": 1.000213623046875, "objective/train/weighted_lm_loss": 2.569542169570923, "objective/train/weights_max": 1.048932671546936, "objective/train/weights_min": 0.9104673266410828, "theoretical_loss": 3.450358829851208, "tokens_seen": 1866399744 }, { "epoch": 0.13, "learning_rate": 0.0008839314581312642, "loss": 1.3012, "theoretical_loss": 3.4503281135676005, "tokens_seen": 1866596352 }, { "epoch": 0.13, "learning_rate": 0.00088360814742968, "loss": 1.3078, "theoretical_loss": 3.450246223719932, "tokens_seen": 1867120640 }, { "epoch": 0.13, "learning_rate": 0.0008832848367280957, "loss": 1.3183, "theoretical_loss": 3.450164363300181, "tokens_seen": 1867644928 }, { "epoch": 0.13, "objective/train/advantage_avg": -0.01598326489329338, "objective/train/docs_used": 1056292, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9123992919921875, "objective/train/original_loss": 2.9123992919921875, "objective/train/theoretical_loss": 3.45010298728611, "objective/train/tokens_used": 226897376, "objective/train/value_avg": -0.057708740234375, "objective/train/value_loss": 0.01774241030216217, "objective/train/value_max": -0.0016546249389648438, "objective/train/value_min": -0.9736328125, "objective/train/value_reward_corr": 0.891834592053641, "objective/train/value_std": 0.125, "objective/train/weight_avg": 0.9984889030456543, "objective/train/weighted_lm_loss": 2.9115071296691895, "objective/train/weights_max": 1.0444458723068237, "objective/train/weights_min": 0.9116736650466919, "theoretical_loss": 3.45010298728611, "tokens_seen": 1868038144 }, { "epoch": 0.13, "learning_rate": 0.0008829615260265114, "loss": 1.3751, "theoretical_loss": 3.4500825322895166, "tokens_seen": 1868169216 }, { "epoch": 0.13, "learning_rate": 0.0008826382153249273, "loss": 1.355, "theoretical_loss": 3.4500007306691236, "tokens_seen": 1868693504 }, { "epoch": 0.13, "learning_rate": 0.0008823149046233431, "loss": 1.3285, "theoretical_loss": 3.449918958420205, "tokens_seen": 1869217792 }, { "epoch": 0.13, "objective/train/advantage_avg": -0.002425486920401454, "objective/train/docs_used": 1057648, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8826181888580322, "objective/train/original_loss": 2.882617950439453, "objective/train/theoretical_loss": 3.4498474317814245, "objective/train/tokens_used": 228535776, "objective/train/value_avg": -0.03863525390625, "objective/train/value_loss": 0.009639203548431396, "objective/train/value_max": -0.000743865966796875, "objective/train/value_min": -0.7919921875, "objective/train/value_reward_corr": 0.616253823927546, "objective/train/value_std": 0.065185546875, "objective/train/weight_avg": 0.9998046159744263, "objective/train/weighted_lm_loss": 2.8821494579315186, "objective/train/weights_max": 1.03953218460083, "objective/train/weights_min": 0.9079236388206482, "theoretical_loss": 3.4498474317814245, "tokens_seen": 1869676544 }, { "epoch": 0.13, "learning_rate": 0.0008819915939217588, "loss": 1.328, "theoretical_loss": 3.4498372155239805, "tokens_seen": 1869742080 }, { "epoch": 0.13, "learning_rate": 0.0008816682832201746, "loss": 1.3938, "theoretical_loss": 3.449755501961688, "tokens_seen": 1870266368 }, { "epoch": 0.13, "learning_rate": 0.0008813449725185903, "loss": 1.3146, "theoretical_loss": 3.449673817714582, "tokens_seen": 1870790656 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.004867166746407747, "objective/train/docs_used": 1058498, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.951927423477173, "objective/train/original_loss": 2.951927423477173, "objective/train/theoretical_loss": 3.4495921627639348, "objective/train/tokens_used": 230174176, "objective/train/value_avg": -0.032928466796875, "objective/train/value_loss": 0.00767010310664773, "objective/train/value_max": -0.0015735626220703125, "objective/train/value_min": -0.943359375, "objective/train/value_reward_corr": 0.6956007740979053, "objective/train/value_std": 0.050445556640625, "objective/train/weight_avg": 1.0005244016647339, "objective/train/weighted_lm_loss": 2.9540514945983887, "objective/train/weights_max": 1.054893136024475, "objective/train/weights_min": 0.9062406420707703, "theoretical_loss": 3.4495921627639348, "tokens_seen": 1871314944 }, { "epoch": 0.13, "learning_rate": 0.0008810216618170062, "loss": 1.3555, "theoretical_loss": 3.4495921627639348, "tokens_seen": 1871314944 }, { "epoch": 0.13, "learning_rate": 0.000880698351115422, "loss": 1.3355, "theoretical_loss": 3.449510537091035, "tokens_seen": 1871839232 }, { "epoch": 0.13, "learning_rate": 0.0008803750404138377, "loss": 1.2906, "theoretical_loss": 3.4494289406771887, "tokens_seen": 1872363520 }, { "epoch": 0.13, "learning_rate": 0.0008800517297122535, "loss": 1.329, "theoretical_loss": 3.44934737350372, "tokens_seen": 1872887808 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.012268186546862125, "objective/train/docs_used": 1059136, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8644845485687256, "objective/train/original_loss": 2.8644843101501465, "objective/train/theoretical_loss": 3.449337179662071, "objective/train/tokens_used": 231812576, "objective/train/value_avg": -0.0198974609375, "objective/train/value_loss": 0.001951635000295937, "objective/train/value_max": -0.00037550926208496094, "objective/train/value_min": -0.71875, "objective/train/value_reward_corr": 0.348385468934152, "objective/train/value_std": 0.01959228515625, "objective/train/weight_avg": 1.0012364387512207, "objective/train/weighted_lm_loss": 2.8687548637390137, "objective/train/weights_max": 1.043108582496643, "objective/train/weights_min": 0.9129685759544373, "theoretical_loss": 3.449337179662071, "tokens_seen": 1872953344 }, { "epoch": 0.13, "learning_rate": 0.0008797284190106692, "loss": 1.3319, "theoretical_loss": 3.4492658355519685, "tokens_seen": 1873412096 }, { "epoch": 0.13, "learning_rate": 0.000879405108309085, "loss": 1.3193, "theoretical_loss": 3.4491843268032927, "tokens_seen": 1873936384 }, { "epoch": 0.13, "learning_rate": 0.0008790817976075009, "loss": 1.374, "theoretical_loss": 3.4491028472390672, "tokens_seen": 1874460672 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.016594257205724716, "objective/train/docs_used": 1059915, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8564324378967285, "objective/train/original_loss": 2.8564324378967285, "objective/train/theoretical_loss": 3.4490824819059, "objective/train/tokens_used": 233450976, "objective/train/value_avg": -0.024139404296875, "objective/train/value_loss": 0.0009816567180678248, "objective/train/value_max": -0.0008625984191894531, "objective/train/value_min": -0.28515625, "objective/train/value_reward_corr": 0.36110551353078024, "objective/train/value_std": 0.0207061767578125, "objective/train/weight_avg": 1.001664400100708, "objective/train/weighted_lm_loss": 2.8621654510498047, "objective/train/weights_max": 1.0224720239639282, "objective/train/weights_min": 0.9676769375801086, "theoretical_loss": 3.4490824819059, "tokens_seen": 1874591744 }, { "epoch": 0.13, "learning_rate": 0.0008787584869059166, "loss": 1.3527, "theoretical_loss": 3.4490213968406835, "tokens_seen": 1874984960 }, { "epoch": 0.13, "learning_rate": 0.0008784351762043324, "loss": 1.3695, "theoretical_loss": 3.4489399755895516, "tokens_seen": 1875509248 }, { "epoch": 0.13, "learning_rate": 0.0008781118655027481, "loss": 1.3188, "theoretical_loss": 3.4488585834670964, "tokens_seen": 1876033536 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0032856655307114124, "objective/train/docs_used": 1060624, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3198349475860596, "objective/train/original_loss": 2.3198349475860596, "objective/train/theoretical_loss": 3.4488280689271242, "objective/train/tokens_used": 235089376, "objective/train/value_avg": -0.02642822265625, "objective/train/value_loss": 0.0024568818043917418, "objective/train/value_max": -0.0008492469787597656, "objective/train/value_min": -0.3203125, "objective/train/value_reward_corr": 0.5510320459597833, "objective/train/value_std": 0.0293731689453125, "objective/train/weight_avg": 1.0003408193588257, "objective/train/weighted_lm_loss": 2.3217856884002686, "objective/train/weights_max": 1.0254637002944946, "objective/train/weights_min": 0.9641479849815369, "theoretical_loss": 3.4488280689271242, "tokens_seen": 1876230144 }, { "epoch": 0.13, "learning_rate": 0.0008777885548011639, "loss": 1.3539, "theoretical_loss": 3.448777220454761, "tokens_seen": 1876557824 }, { "epoch": 0.13, "learning_rate": 0.0008774652440995798, "loss": 1.3719, "theoretical_loss": 3.448695886534006, "tokens_seen": 1877082112 }, { "epoch": 0.13, "learning_rate": 0.0008771419333979955, "loss": 1.3546, "theoretical_loss": 3.4486145816863085, "tokens_seen": 1877606400 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.007215149234980345, "objective/train/docs_used": 1062058, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5698108673095703, "objective/train/original_loss": 2.5698108673095703, "objective/train/theoretical_loss": 3.4485739401590725, "objective/train/tokens_used": 236727776, "objective/train/value_avg": -0.022705078125, "objective/train/value_loss": 0.00558233680203557, "objective/train/value_max": -0.001438140869140625, "objective/train/value_min": -0.66259765625, "objective/train/value_reward_corr": 0.2537790917127955, "objective/train/value_std": 0.0239410400390625, "objective/train/weight_avg": 1.0007489919662476, "objective/train/weighted_lm_loss": 2.5726351737976074, "objective/train/weights_max": 1.037023663520813, "objective/train/weights_min": 0.907504141330719, "theoretical_loss": 3.4485739401590725, "tokens_seen": 1877868544 }, { "epoch": 0.13, "learning_rate": 0.0008768186226964112, "loss": 1.332, "theoretical_loss": 3.4485333058931618, "tokens_seen": 1878130688 }, { "epoch": 0.13, "learning_rate": 0.000876495311994827, "loss": 1.3041, "theoretical_loss": 3.4484520591360774, "tokens_seen": 1878654976 }, { "epoch": 0.13, "learning_rate": 0.0008761720012932428, "loss": 1.3441, "theoretical_loss": 3.448370841396583, "tokens_seen": 1879179264 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.015205965377390385, "objective/train/docs_used": 1062685, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0548996925354004, "objective/train/original_loss": 3.0548999309539795, "objective/train/theoretical_loss": 3.4483200950366926, "objective/train/tokens_used": 238366176, "objective/train/value_avg": -0.022857666015625, "objective/train/value_loss": 0.0029685075860470533, "objective/train/value_max": -0.001674652099609375, "objective/train/value_min": -0.72265625, "objective/train/value_reward_corr": 0.2990940745021681, "objective/train/value_std": 0.0231170654296875, "objective/train/weight_avg": 1.0015352964401245, "objective/train/weighted_lm_loss": 3.0602428913116455, "objective/train/weights_max": 1.0588465929031372, "objective/train/weights_min": 0.917963981628418, "theoretical_loss": 3.4483200950366926, "tokens_seen": 1879506944 }, { "epoch": 0.13, "learning_rate": 0.0008758486905916587, "loss": 1.3405, "theoretical_loss": 3.448289652656223, "tokens_seen": 1879703552 }, { "epoch": 0.13, "learning_rate": 0.0008755253798900744, "loss": 1.3671, "theoretical_loss": 3.4482084928965593, "tokens_seen": 1880227840 }, { "epoch": 0.13, "learning_rate": 0.0008752020691884901, "loss": 1.3246, "theoretical_loss": 3.4481273620991697, "tokens_seen": 1880752128 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.014949413947761059, "objective/train/docs_used": 1064316, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8203611373901367, "objective/train/original_loss": 2.820361375808716, "objective/train/theoretical_loss": 3.4480665329965485, "objective/train/tokens_used": 240004576, "objective/train/value_avg": -0.038055419921875, "objective/train/value_loss": 0.006842659320682287, "objective/train/value_max": -0.0014781951904296875, "objective/train/value_min": -0.97705078125, "objective/train/value_reward_corr": 0.6452368017373056, "objective/train/value_std": 0.07891845703125, "objective/train/weight_avg": 1.0015289783477783, "objective/train/weighted_lm_loss": 2.824741840362549, "objective/train/weights_max": 1.089203119277954, "objective/train/weights_min": 0.914650022983551, "theoretical_loss": 3.4480665329965485, "tokens_seen": 1881145344 }, { "epoch": 0.13, "learning_rate": 0.0008748787584869059, "loss": 1.3561, "theoretical_loss": 3.4480462602456505, "tokens_seen": 1881276416 }, { "epoch": 0.13, "learning_rate": 0.0008745554477853217, "loss": 1.337, "theoretical_loss": 3.447965187317614, "tokens_seen": 1881800704 }, { "epoch": 0.13, "learning_rate": 0.0008742321370837376, "loss": 1.3289, "theoretical_loss": 3.4478841432966876, "tokens_seen": 1882324992 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.012089555151760578, "objective/train/docs_used": 1064917, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.566396713256836, "objective/train/original_loss": 2.566396474838257, "objective/train/theoretical_loss": 3.447813253476813, "objective/train/tokens_used": 241642976, "objective/train/value_avg": -0.0191192626953125, "objective/train/value_loss": 0.0005802254308946431, "objective/train/value_max": -0.0011157989501953125, "objective/train/value_min": -0.1783447265625, "objective/train/value_reward_corr": 0.44890729857114287, "objective/train/value_std": 0.01517486572265625, "objective/train/weight_avg": 1.0012118816375732, "objective/train/weighted_lm_loss": 2.570420026779175, "objective/train/weights_max": 1.0177065134048462, "objective/train/weights_min": 0.971094012260437, "theoretical_loss": 3.447813253476813, "tokens_seen": 1882783744 }, { "epoch": 0.14, "learning_rate": 0.0008739088263821533, "loss": 1.2838, "theoretical_loss": 3.4478031281645185, "tokens_seen": 1882849280 }, { "epoch": 0.14, "learning_rate": 0.0008735855156805691, "loss": 1.3653, "theoretical_loss": 3.447722141902769, "tokens_seen": 1883373568 }, { "epoch": 0.14, "learning_rate": 0.0008732622049789848, "loss": 1.3384, "theoretical_loss": 3.4476411844931176, "tokens_seen": 1883897856 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.012461572885513306, "objective/train/docs_used": 1065907, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4985668659210205, "objective/train/original_loss": 2.4985666275024414, "objective/train/theoretical_loss": 3.447560255917261, "objective/train/tokens_used": 243281376, "objective/train/value_avg": -0.0250244140625, "objective/train/value_loss": 0.003816446056589484, "objective/train/value_max": -0.0018100738525390625, "objective/train/value_min": -0.9736328125, "objective/train/value_reward_corr": 0.5166515653497066, "objective/train/value_std": 0.035552978515625, "objective/train/weight_avg": 1.0012648105621338, "objective/train/weighted_lm_loss": 2.5018861293792725, "objective/train/weights_max": 1.0521676540374756, "objective/train/weights_min": 0.9106810092926025, "theoretical_loss": 3.447560255917261, "tokens_seen": 1884422144 }, { "epoch": 0.14, "learning_rate": 0.0008729388942774006, "loss": 1.3079, "theoretical_loss": 3.447560255917261, "tokens_seen": 1884422144 }, { "epoch": 0.14, "learning_rate": 0.0008726155835758163, "loss": 1.3245, "theoretical_loss": 3.4474793561569106, "tokens_seen": 1884946432 }, { "epoch": 0.14, "learning_rate": 0.0008722922728742322, "loss": 1.36, "theoretical_loss": 3.4473984851937973, "tokens_seen": 1885470720 }, { "epoch": 0.14, "learning_rate": 0.000871968962172648, "loss": 1.3031, "theoretical_loss": 3.4473176430096664, "tokens_seen": 1885995008 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.002817000960931182, "objective/train/docs_used": 1066425, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6394083499908447, "objective/train/original_loss": 2.6394083499908447, "objective/train/theoretical_loss": 3.4473075397592634, "objective/train/tokens_used": 244919776, "objective/train/value_avg": -0.032196044921875, "objective/train/value_loss": 0.009916171431541443, "objective/train/value_max": -0.0013723373413085938, "objective/train/value_min": -0.73046875, "objective/train/value_reward_corr": 0.519412826012051, "objective/train/value_std": 0.03692626953125, "objective/train/weight_avg": 1.0003303289413452, "objective/train/weighted_lm_loss": 2.640183448791504, "objective/train/weights_max": 1.0408198833465576, "objective/train/weights_min": 0.9098087549209595, "theoretical_loss": 3.4473075397592634, "tokens_seen": 1886060544 }, { "epoch": 0.14, "learning_rate": 0.0008716456514710637, "loss": 1.3172, "theoretical_loss": 3.44723682958628, "tokens_seen": 1886519296 }, { "epoch": 0.14, "learning_rate": 0.0008713223407694795, "loss": 1.3185, "theoretical_loss": 3.4471560449054186, "tokens_seen": 1887043584 }, { "epoch": 0.14, "learning_rate": 0.0008709990300678952, "loss": 1.2979, "theoretical_loss": 3.4470752889488763, "tokens_seen": 1887567872 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.007560465019196272, "objective/train/docs_used": 1067085, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5917818546295166, "objective/train/original_loss": 2.5917820930480957, "objective/train/theoretical_loss": 3.4470551044457842, "objective/train/tokens_used": 246558176, "objective/train/value_avg": -0.0345458984375, "objective/train/value_loss": 0.004109109286218882, "objective/train/value_max": -0.0009508132934570312, "objective/train/value_min": -0.857421875, "objective/train/value_reward_corr": 0.7438986841182647, "objective/train/value_std": 0.05181884765625, "objective/train/weight_avg": 1.0007764101028442, "objective/train/weighted_lm_loss": 2.5935306549072266, "objective/train/weights_max": 1.0544670820236206, "objective/train/weights_min": 0.9114324450492859, "theoretical_loss": 3.4470551044457842, "tokens_seen": 1887698944 }, { "epoch": 0.14, "learning_rate": 0.0008706757193663111, "loss": 1.323, "theoretical_loss": 3.4469945616984674, "tokens_seen": 1888092160 }, { "epoch": 0.14, "learning_rate": 0.0008703524086647269, "loss": 1.3268, "theoretical_loss": 3.446913863136019, "tokens_seen": 1888616448 }, { "epoch": 0.14, "learning_rate": 0.0008700290979631425, "loss": 1.3136, "theoretical_loss": 3.4468331932433784, "tokens_seen": 1889140736 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0030048706103116274, "objective/train/docs_used": 1068281, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4557034969329834, "objective/train/original_loss": 2.4557032585144043, "objective/train/theoretical_loss": 3.4468029494213703, "objective/train/tokens_used": 248196576, "objective/train/value_avg": -0.0233917236328125, "objective/train/value_loss": 0.00563434511423111, "objective/train/value_max": -0.0007948875427246094, "objective/train/value_min": -0.9580078125, "objective/train/value_reward_corr": 0.3951451630660363, "objective/train/value_std": 0.0269927978515625, "objective/train/weight_avg": 1.0003281831741333, "objective/train/weighted_lm_loss": 2.457690954208374, "objective/train/weights_max": 1.0802568197250366, "objective/train/weights_min": 0.91530442237854, "theoretical_loss": 3.4468029494213703, "tokens_seen": 1889337344 }, { "epoch": 0.14, "learning_rate": 0.0008697057872615584, "loss": 1.2732, "theoretical_loss": 3.4467525520024065, "tokens_seen": 1889665024 }, { "epoch": 0.14, "learning_rate": 0.0008693824765599741, "loss": 1.3105, "theoretical_loss": 3.4466719393949825, "tokens_seen": 1890189312 }, { "epoch": 0.14, "learning_rate": 0.00086905916585839, "loss": 1.3128, "theoretical_loss": 3.446591355403001, "tokens_seen": 1890713600 }, { "epoch": 0.14, "objective/train/advantage_avg": -0.03140066936612129, "objective/train/docs_used": 1069021, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.67916202545166, "objective/train/original_loss": 2.6791622638702393, "objective/train/theoretical_loss": 3.4465510741321483, "objective/train/tokens_used": 249834976, "objective/train/value_avg": -0.0204315185546875, "objective/train/value_loss": 0.035787660628557205, "objective/train/value_max": -0.0011835098266601562, "objective/train/value_min": -0.62890625, "objective/train/value_reward_corr": -0.06081425215824218, "objective/train/value_std": 0.015533447265625, "objective/train/weight_avg": 0.9970345497131348, "objective/train/weighted_lm_loss": 2.6806206703186035, "objective/train/weights_max": 1.0569566488265991, "objective/train/weights_min": 0.9265571236610413, "theoretical_loss": 3.4465510741321483, "tokens_seen": 1890975744 }, { "epoch": 0.14, "learning_rate": 0.0008687358551568058, "loss": 1.2797, "theoretical_loss": 3.446510800008374, "tokens_seen": 1891237888 }, { "epoch": 0.14, "learning_rate": 0.0008684125444552214, "loss": 1.3333, "theoretical_loss": 3.446430273193029, "tokens_seen": 1891762176 }, { "epoch": 0.14, "learning_rate": 0.0008680892337536373, "loss": 1.337, "theoretical_loss": 3.4463497749389105, "tokens_seen": 1892286464 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.007914035581052303, "objective/train/docs_used": 1070317, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5645289421081543, "objective/train/original_loss": 2.5645291805267334, "objective/train/theoretical_loss": 3.446299478025818, "objective/train/tokens_used": 251473376, "objective/train/value_avg": -0.0215301513671875, "objective/train/value_loss": 0.002426565159112215, "objective/train/value_max": -0.0004916191101074219, "objective/train/value_min": -0.2152099609375, "objective/train/value_reward_corr": 0.3358534411026787, "objective/train/value_std": 0.0156402587890625, "objective/train/weight_avg": 1.0008034706115723, "objective/train/weighted_lm_loss": 2.5673627853393555, "objective/train/weights_max": 1.0157667398452759, "objective/train/weights_min": 0.9315345287322998, "theoretical_loss": 3.446299478025818, "tokens_seen": 1892614144 }, { "epoch": 0.14, "learning_rate": 0.000867765923052053, "loss": 1.3263, "theoretical_loss": 3.446269305227979, "tokens_seen": 1892810752 }, { "epoch": 0.14, "learning_rate": 0.0008674426123504688, "loss": 1.313, "theoretical_loss": 3.4461888640422123, "tokens_seen": 1893335040 }, { "epoch": 0.14, "learning_rate": 0.0008671193016488847, "loss": 1.3043, "theoretical_loss": 3.446108451363603, "tokens_seen": 1893859328 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0022732431534677744, "objective/train/docs_used": 1070823, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.759701728820801, "objective/train/original_loss": 2.75970196723938, "objective/train/theoretical_loss": 3.4460481605516464, "objective/train/tokens_used": 253111776, "objective/train/value_avg": -0.02557373046875, "objective/train/value_loss": 0.0038690969813615084, "objective/train/value_max": -0.001361846923828125, "objective/train/value_min": -0.8232421875, "objective/train/value_reward_corr": 0.5180750919773639, "objective/train/value_std": 0.026947021484375, "objective/train/weight_avg": 1.000246524810791, "objective/train/weighted_lm_loss": 2.7615773677825928, "objective/train/weights_max": 1.0313485860824585, "objective/train/weights_min": 0.9138754606246948, "theoretical_loss": 3.4460481605516464, "tokens_seen": 1894252544 }, { "epoch": 0.14, "learning_rate": 0.0008667959909473003, "loss": 1.2891, "theoretical_loss": 3.4460280671741623, "tokens_seen": 1894383616 }, { "epoch": 0.14, "learning_rate": 0.0008664726802457162, "loss": 1.3133, "theoretical_loss": 3.4459477114559154, "tokens_seen": 1894907904 }, { "epoch": 0.14, "learning_rate": 0.0008661493695441319, "loss": 1.3416, "theoretical_loss": 3.445867384190905, "tokens_seen": 1895432192 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0205168928951025, "objective/train/docs_used": 1071554, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.75091552734375, "objective/train/original_loss": 2.750915765762329, "objective/train/theoretical_loss": 3.445797121160462, "objective/train/tokens_used": 254750176, "objective/train/value_avg": -0.039031982421875, "objective/train/value_loss": 0.003261704696342349, "objective/train/value_max": -0.0010528564453125, "objective/train/value_min": -0.96142578125, "objective/train/value_reward_corr": 0.5305994681047989, "objective/train/value_std": 0.048126220703125, "objective/train/weight_avg": 1.0020679235458374, "objective/train/weighted_lm_loss": 2.7576467990875244, "objective/train/weights_max": 1.0606775283813477, "objective/train/weights_min": 0.9088544845581055, "theoretical_loss": 3.445797121160462, "tokens_seen": 1895890944 }, { "epoch": 0.14, "learning_rate": 0.0008658260588425477, "loss": 1.314, "theoretical_loss": 3.4457870853611903, "tokens_seen": 1895956480 }, { "epoch": 0.14, "learning_rate": 0.0008655027481409635, "loss": 1.2827, "theoretical_loss": 3.4457068149488457, "tokens_seen": 1896480768 }, { "epoch": 0.14, "learning_rate": 0.0008651794374393792, "loss": 1.2892, "theoretical_loss": 3.4456265729359634, "tokens_seen": 1897005056 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.016756266355514526, "objective/train/docs_used": 1072893, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5654804706573486, "objective/train/original_loss": 2.5654807090759277, "objective/train/theoretical_loss": 3.44554635930465, "objective/train/tokens_used": 256388576, "objective/train/value_avg": -0.0230560302734375, "objective/train/value_loss": 0.0010730517096817493, "objective/train/value_max": -0.00047278404235839844, "objective/train/value_min": -0.55126953125, "objective/train/value_reward_corr": 0.2840080716056042, "objective/train/value_std": 0.023712158203125, "objective/train/weight_avg": 1.0016810894012451, "objective/train/weighted_lm_loss": 2.570277690887451, "objective/train/weights_max": 1.0546727180480957, "objective/train/weights_min": 0.9633215665817261, "theoretical_loss": 3.44554635930465, "tokens_seen": 1897529344 }, { "epoch": 0.14, "learning_rate": 0.0008648561267377951, "loss": 1.2913, "theoretical_loss": 3.44554635930465, "tokens_seen": 1897529344 }, { "epoch": 0.14, "learning_rate": 0.0008645328160362108, "loss": 1.3239, "theoretical_loss": 3.44546617403703, "tokens_seen": 1898053632 }, { "epoch": 0.14, "learning_rate": 0.0008642095053346266, "loss": 1.3567, "theoretical_loss": 3.4453860171152426, "tokens_seen": 1898577920 }, { "epoch": 0.14, "learning_rate": 0.0008638861946330424, "loss": 1.3253, "theoretical_loss": 3.445305888521445, "tokens_seen": 1899102208 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.011736485175788403, "objective/train/docs_used": 1073632, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4856345653533936, "objective/train/original_loss": 2.4856345653533936, "objective/train/theoretical_loss": 3.445295874438144, "objective/train/tokens_used": 258026976, "objective/train/value_avg": -0.0138702392578125, "objective/train/value_loss": 0.00023078186495695263, "objective/train/value_max": -0.0007319450378417969, "objective/train/value_min": -0.12408447265625, "objective/train/value_reward_corr": 0.0537496063397639, "objective/train/value_std": 0.00913238525390625, "objective/train/weight_avg": 1.0011749267578125, "objective/train/weighted_lm_loss": 2.4894087314605713, "objective/train/weights_max": 1.0122429132461548, "objective/train/weights_min": 0.9951488971710205, "theoretical_loss": 3.445295874438144, "tokens_seen": 1899167744 }, { "epoch": 0.15, "learning_rate": 0.0008635628839314581, "loss": 1.336, "theoretical_loss": 3.4452257882378086, "tokens_seen": 1899626496 }, { "epoch": 0.15, "learning_rate": 0.0008632395732298739, "loss": 1.3187, "theoretical_loss": 3.4451457162465227, "tokens_seen": 1900150784 }, { "epoch": 0.15, "learning_rate": 0.0008629162625282897, "loss": 1.3231, "theoretical_loss": 3.4450656725297906, "tokens_seen": 1900675072 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.009564480744302273, "objective/train/docs_used": 1075027, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9195103645324707, "objective/train/original_loss": 2.9195103645324707, "objective/train/theoretical_loss": 3.445045666016423, "objective/train/tokens_used": 259665376, "objective/train/value_avg": -0.0227203369140625, "objective/train/value_loss": 0.0019679460674524307, "objective/train/value_max": -0.000865936279296875, "objective/train/value_min": -0.3720703125, "objective/train/value_reward_corr": 0.5921571166967114, "objective/train/value_std": 0.0280914306640625, "objective/train/weight_avg": 1.000966191291809, "objective/train/weighted_lm_loss": 2.923151969909668, "objective/train/weights_max": 1.0229593515396118, "objective/train/weights_min": 0.9582697153091431, "theoretical_loss": 3.445045666016423, "tokens_seen": 1900806144 }, { "epoch": 0.15, "learning_rate": 0.0008625929518267055, "loss": 1.3036, "theoretical_loss": 3.444985657069834, "tokens_seen": 1901199360 }, { "epoch": 0.15, "learning_rate": 0.0008622696411251213, "loss": 1.333, "theoretical_loss": 3.444905669848889, "tokens_seen": 1901723648 }, { "epoch": 0.15, "learning_rate": 0.000861946330423537, "loss": 1.3738, "theoretical_loss": 3.444825710849209, "tokens_seen": 1902247936 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.01713624969124794, "objective/train/docs_used": 1075669, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7257239818573, "objective/train/original_loss": 2.7257237434387207, "objective/train/theoretical_loss": 3.4447957334965045, "objective/train/tokens_used": 261303776, "objective/train/value_avg": -0.021697998046875, "objective/train/value_loss": 0.002551504410803318, "objective/train/value_max": -0.0020427703857421875, "objective/train/value_min": -0.927734375, "objective/train/value_reward_corr": 0.21787570415517465, "objective/train/value_std": 0.020111083984375, "objective/train/weight_avg": 1.0017261505126953, "objective/train/weighted_lm_loss": 2.731285333633423, "objective/train/weights_max": 1.0487592220306396, "objective/train/weights_min": 0.9146391749382019, "theoretical_loss": 3.4447957334965045, "tokens_seen": 1902444544 }, { "epoch": 0.15, "learning_rate": 0.0008616230197219527, "loss": 1.3908, "theoretical_loss": 3.4447457800530623, "tokens_seen": 1902772224 }, { "epoch": 0.15, "learning_rate": 0.0008612997090203686, "loss": 1.3577, "theoretical_loss": 3.444665877442734, "tokens_seen": 1903296512 }, { "epoch": 0.15, "learning_rate": 0.0008609763983187844, "loss": 1.4038, "theoretical_loss": 3.4445860030005253, "tokens_seen": 1903820800 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.015912052243947983, "objective/train/docs_used": 1076839, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4968268871307373, "objective/train/original_loss": 2.496826410293579, "objective/train/theoretical_loss": 3.444546076336939, "objective/train/tokens_used": 262942176, "objective/train/value_avg": -0.022186279296875, "objective/train/value_loss": 0.0008444814011454582, "objective/train/value_max": -0.0007948875427246094, "objective/train/value_min": -0.16259765625, "objective/train/value_reward_corr": 0.1909227784462895, "objective/train/value_std": 0.015838623046875, "objective/train/weight_avg": 1.001595377922058, "objective/train/weighted_lm_loss": 2.5016324520111084, "objective/train/weights_max": 1.0143392086029053, "objective/train/weights_min": 0.9609971046447754, "theoretical_loss": 3.444546076336939, "tokens_seen": 1904082944 }, { "epoch": 0.15, "learning_rate": 0.0008606530876172001, "loss": 1.4438, "theoretical_loss": 3.4445061567087523, "tokens_seen": 1904345088 }, { "epoch": 0.15, "learning_rate": 0.0008603297769156159, "loss": 1.4606, "theoretical_loss": 3.4444263385497482, "tokens_seen": 1904869376 }, { "epoch": 0.15, "learning_rate": 0.0008600064662140316, "loss": 1.6166, "theoretical_loss": 3.4443465485058615, "tokens_seen": 1905393664 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.008998468518257141, "objective/train/docs_used": 1077487, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.6149179935455322, "objective/train/original_loss": 3.6149179935455322, "objective/train/theoretical_loss": 3.4442966939978037, "objective/train/tokens_used": 264580576, "objective/train/value_avg": -0.0221099853515625, "objective/train/value_loss": 0.0019522274378687143, "objective/train/value_max": -0.0017414093017578125, "objective/train/value_min": -0.32763671875, "objective/train/value_reward_corr": 0.5228837397386478, "objective/train/value_std": 0.0198974609375, "objective/train/weight_avg": 1.0009095668792725, "objective/train/weighted_lm_loss": 3.618626594543457, "objective/train/weights_max": 1.0215109586715698, "objective/train/weights_min": 0.9549351930618286, "theoretical_loss": 3.4442966939978037, "tokens_seen": 1905721344 }, { "epoch": 0.15, "learning_rate": 0.0008596831555124475, "loss": 1.9009, "theoretical_loss": 3.444266786559457, "tokens_seen": 1905917952 }, { "epoch": 0.15, "learning_rate": 0.0008593598448108633, "loss": 1.5419, "theoretical_loss": 3.4441870526929153, "tokens_seen": 1906442240 }, { "epoch": 0.15, "learning_rate": 0.000859036534109279, "loss": 1.6175, "theoretical_loss": 3.444107346888633, "tokens_seen": 1906966528 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.011119750328361988, "objective/train/docs_used": 1078017, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.2529163360595703, "objective/train/original_loss": 3.2529163360595703, "objective/train/theoretical_loss": 3.4440475859406985, "objective/train/tokens_used": 266218976, "objective/train/value_avg": -0.020538330078125, "objective/train/value_loss": 0.0017749095568433404, "objective/train/value_max": -0.001239776611328125, "objective/train/value_min": -0.58935546875, "objective/train/value_reward_corr": 0.5488605565907839, "objective/train/value_std": 0.022918701171875, "objective/train/weight_avg": 1.0011208057403564, "objective/train/weighted_lm_loss": 3.25726580619812, "objective/train/weights_max": 1.025789499282837, "objective/train/weights_min": 0.9353953003883362, "theoretical_loss": 3.4440475859406985, "tokens_seen": 1907359744 }, { "epoch": 0.15, "learning_rate": 0.0008587132234076949, "loss": 1.6477, "theoretical_loss": 3.4440276691290226, "tokens_seen": 1907490816 }, { "epoch": 0.15, "learning_rate": 0.0008583899127061105, "loss": 1.6212, "theoretical_loss": 3.4439480193965117, "tokens_seen": 1908015104 }, { "epoch": 0.15, "learning_rate": 0.0008580666020045263, "loss": 1.5461, "theoretical_loss": 3.443868397673545, "tokens_seen": 1908539392 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.005889748223125935, "objective/train/docs_used": 1079511, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.094499349594116, "objective/train/original_loss": 3.094499349594116, "objective/train/theoretical_loss": 3.443798751628739, "objective/train/tokens_used": 267857376, "objective/train/value_avg": -0.018402099609375, "objective/train/value_loss": 0.0028699361719191074, "objective/train/value_max": -0.0009183883666992188, "objective/train/value_min": -0.2021484375, "objective/train/value_reward_corr": 0.24935163255089354, "objective/train/value_std": 0.0160980224609375, "objective/train/weight_avg": 1.0006030797958374, "objective/train/weighted_lm_loss": 3.096608877182007, "objective/train/weights_max": 1.0173702239990234, "objective/train/weights_min": 0.9268484115600586, "theoretical_loss": 3.443798751628739, "tokens_seen": 1908998144 }, { "epoch": 0.15, "learning_rate": 0.0008577432913029422, "loss": 1.5376, "theoretical_loss": 3.443788803942582, "tokens_seen": 1909063680 }, { "epoch": 0.15, "learning_rate": 0.0008574199806013579, "loss": 1.5129, "theoretical_loss": 3.443709238186098, "tokens_seen": 1909587968 }, { "epoch": 0.15, "learning_rate": 0.0008570966698997738, "loss": 1.4896, "theoretical_loss": 3.4436297003865852, "tokens_seen": 1910112256 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.01912420429289341, "objective/train/docs_used": 1080108, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.962597131729126, "objective/train/original_loss": 2.9625966548919678, "objective/train/theoretical_loss": 3.44355019052655, "objective/train/tokens_used": 269495776, "objective/train/value_avg": -0.03155517578125, "objective/train/value_loss": 0.006183961872011423, "objective/train/value_max": -0.001506805419921875, "objective/train/value_min": -0.60595703125, "objective/train/value_reward_corr": 0.20350206117960942, "objective/train/value_std": 0.046600341796875, "objective/train/weight_avg": 1.0019428730010986, "objective/train/weighted_lm_loss": 2.9673008918762207, "objective/train/weights_max": 1.041297435760498, "objective/train/weights_min": 0.9090885519981384, "theoretical_loss": 3.44355019052655, "tokens_seen": 1910636544 }, { "epoch": 0.15, "learning_rate": 0.0008567733591981894, "loss": 1.4851, "theoretical_loss": 3.44355019052655, "tokens_seen": 1910636544 }, { "epoch": 0.15, "learning_rate": 0.0008564500484966052, "loss": 1.5155, "theoretical_loss": 3.4434707085885163, "tokens_seen": 1911160832 }, { "epoch": 0.15, "learning_rate": 0.0008561267377950211, "loss": 1.5175, "theoretical_loss": 3.4433912545550216, "tokens_seen": 1911685120 }, { "epoch": 0.15, "learning_rate": 0.0008558034270934368, "loss": 1.4985, "theoretical_loss": 3.443311828408621, "tokens_seen": 1912209408 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.019957559183239937, "objective/train/docs_used": 1081319, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.717841863632202, "objective/train/original_loss": 2.7178421020507812, "objective/train/theoretical_loss": 3.4433019021002638, "objective/train/tokens_used": 271134176, "objective/train/value_avg": -0.03466796875, "objective/train/value_loss": 0.0034972738940268755, "objective/train/value_max": -0.0025615692138671875, "objective/train/value_min": -0.48388671875, "objective/train/value_reward_corr": 0.2681348242024209, "objective/train/value_std": 0.035186767578125, "objective/train/weight_avg": 1.002013087272644, "objective/train/weighted_lm_loss": 2.7250165939331055, "objective/train/weights_max": 1.0469341278076172, "objective/train/weights_min": 0.9223465919494629, "theoretical_loss": 3.4433019021002638, "tokens_seen": 1912274944 }, { "epoch": 0.15, "learning_rate": 0.0008554801163918527, "loss": 1.4967, "theoretical_loss": 3.443232430131884, "tokens_seen": 1912733696 }, { "epoch": 0.15, "learning_rate": 0.0008551568056902683, "loss": 1.4896, "theoretical_loss": 3.443153059707397, "tokens_seen": 1913257984 }, { "epoch": 0.15, "learning_rate": 0.0008548334949886841, "loss": 1.4457, "theoretical_loss": 3.4430737171177612, "tokens_seen": 1913782272 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.010879095643758774, "objective/train/docs_used": 1081978, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6628224849700928, "objective/train/original_loss": 2.6628222465515137, "objective/train/theoretical_loss": 3.4430538858175104, "objective/train/tokens_used": 272772576, "objective/train/value_avg": -0.02215576171875, "objective/train/value_loss": 0.0012795020593330264, "objective/train/value_max": -0.002010345458984375, "objective/train/value_min": -0.6787109375, "objective/train/value_reward_corr": 0.2563514833318438, "objective/train/value_std": 0.01544952392578125, "objective/train/weight_avg": 1.0010942220687866, "objective/train/weighted_lm_loss": 2.666422128677368, "objective/train/weights_max": 1.0634685754776, "objective/train/weights_min": 0.910868227481842, "theoretical_loss": 3.4430538858175104, "tokens_seen": 1913913344 }, { "epoch": 0.15, "learning_rate": 0.0008545101842871, "loss": 1.4111, "theoretical_loss": 3.4429944023455934, "tokens_seen": 1914306560 }, { "epoch": 0.15, "learning_rate": 0.0008541868735855157, "loss": 1.3876, "theoretical_loss": 3.442915115373526, "tokens_seen": 1914830848 }, { "epoch": 0.15, "learning_rate": 0.0008538635628839315, "loss": 1.4156, "theoretical_loss": 3.4428358561842076, "tokens_seen": 1915355136 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0035302406176924706, "objective/train/docs_used": 1083212, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7173891067504883, "objective/train/original_loss": 2.7173898220062256, "objective/train/theoretical_loss": 3.442806141147414, "objective/train/tokens_used": 274410976, "objective/train/value_avg": -0.023529052734375, "objective/train/value_loss": 0.0028884343337267637, "objective/train/value_max": -0.0014162063598632812, "objective/train/value_min": -0.85595703125, "objective/train/value_reward_corr": 0.4354067913895963, "objective/train/value_std": 0.029388427734375, "objective/train/weight_avg": 1.000367283821106, "objective/train/weighted_lm_loss": 2.719200611114502, "objective/train/weights_max": 1.0451194047927856, "objective/train/weights_min": 0.9089704751968384, "theoretical_loss": 3.442806141147414, "tokens_seen": 1915551744 }, { "epoch": 0.16, "learning_rate": 0.0008535402521823472, "loss": 1.4466, "theoretical_loss": 3.4427566247603014, "tokens_seen": 1915879424 }, { "epoch": 0.16, "learning_rate": 0.000853216941480763, "loss": 1.3793, "theoretical_loss": 3.442677421084487, "tokens_seen": 1916403712 }, { "epoch": 0.16, "learning_rate": 0.0008528936307791789, "loss": 1.4228, "theoretical_loss": 3.44259824513946, "tokens_seen": 1916928000 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.013139400631189346, "objective/train/docs_used": 1083753, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.2705440521240234, "objective/train/original_loss": 3.2705438137054443, "objective/train/theoretical_loss": 3.4425586675605877, "objective/train/tokens_used": 276049376, "objective/train/value_avg": -0.02093505859375, "objective/train/value_loss": 0.0026619986165314913, "objective/train/value_max": -0.0012598037719726562, "objective/train/value_min": -0.76416015625, "objective/train/value_reward_corr": 0.33373771373852784, "objective/train/value_std": 0.024200439453125, "objective/train/weight_avg": 1.0013271570205688, "objective/train/weighted_lm_loss": 3.2755701541900635, "objective/train/weights_max": 1.0515906810760498, "objective/train/weights_min": 0.9421148300170898, "theoretical_loss": 3.4425586675605877, "tokens_seen": 1917190144 }, { "epoch": 0.16, "learning_rate": 0.0008525703200775946, "loss": 1.3594, "theoretical_loss": 3.4425190969079296, "tokens_seen": 1917452288 }, { "epoch": 0.16, "learning_rate": 0.0008522470093760104, "loss": 1.375, "theoretical_loss": 3.442439976372623, "tokens_seen": 1917976576 }, { "epoch": 0.16, "learning_rate": 0.0008519236986744261, "loss": 1.4014, "theoretical_loss": 3.4423608835162804, "tokens_seen": 1918500864 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.006796377245336771, "objective/train/docs_used": 1084888, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.557621479034424, "objective/train/original_loss": 2.557621717453003, "objective/train/theoretical_loss": 3.4423114645291277, "objective/train/tokens_used": 277687776, "objective/train/value_avg": -0.0173492431640625, "objective/train/value_loss": 0.0026167731266468763, "objective/train/value_max": -0.0016107559204101562, "objective/train/value_min": -0.322265625, "objective/train/value_reward_corr": 0.2281187689345156, "objective/train/value_std": 0.01186370849609375, "objective/train/weight_avg": 1.0006924867630005, "objective/train/weighted_lm_loss": 2.560102939605713, "objective/train/weights_max": 1.0178049802780151, "objective/train/weights_min": 0.9209941029548645, "theoretical_loss": 3.4423114645291277, "tokens_seen": 1918828544 }, { "epoch": 0.16, "learning_rate": 0.0008516003879728419, "loss": 1.4102, "theoretical_loss": 3.442281818321659, "tokens_seen": 1919025152 }, { "epoch": 0.16, "learning_rate": 0.0008512770772712576, "loss": 1.4105, "theoretical_loss": 3.442202780771531, "tokens_seen": 1919549440 }, { "epoch": 0.16, "learning_rate": 0.0008509537665696735, "loss": 1.3964, "theoretical_loss": 3.442123770848685, "tokens_seen": 1920073728 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.013114315457642078, "objective/train/docs_used": 1085471, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.897791624069214, "objective/train/original_loss": 2.897792100906372, "objective/train/theoretical_loss": 3.442064531526608, "objective/train/tokens_used": 279326176, "objective/train/value_avg": -0.01885986328125, "objective/train/value_loss": 0.0011748933466151357, "objective/train/value_max": -0.0015306472778320312, "objective/train/value_min": -0.55859375, "objective/train/value_reward_corr": 0.2596282766594812, "objective/train/value_std": 0.0167083740234375, "objective/train/weight_avg": 1.0013171434402466, "objective/train/weighted_lm_loss": 2.9020559787750244, "objective/train/weights_max": 1.0573519468307495, "objective/train/weights_min": 0.9363995790481567, "theoretical_loss": 3.442064531526608, "tokens_seen": 1920466944 }, { "epoch": 0.16, "learning_rate": 0.0008506304558680893, "loss": 1.3729, "theoretical_loss": 3.442044788535923, "tokens_seen": 1920598016 }, { "epoch": 0.16, "learning_rate": 0.000850307145166505, "loss": 1.4016, "theoretical_loss": 3.441965833816064, "tokens_seen": 1921122304 }, { "epoch": 0.16, "learning_rate": 0.0008499838344649208, "loss": 1.4247, "theoretical_loss": 3.4418869066719417, "tokens_seen": 1921646592 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.008777748793363571, "objective/train/docs_used": 1086847, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.994741916656494, "objective/train/original_loss": 2.9947423934936523, "objective/train/theoretical_loss": 3.4418178680280738, "objective/train/tokens_used": 280964576, "objective/train/value_avg": -0.0182647705078125, "objective/train/value_loss": 0.0015315611381083727, "objective/train/value_max": -0.0011692047119140625, "objective/train/value_min": -0.1729736328125, "objective/train/value_reward_corr": 0.3453108363658259, "objective/train/value_std": 0.014068603515625, "objective/train/weight_avg": 1.000885248184204, "objective/train/weighted_lm_loss": 2.9978699684143066, "objective/train/weights_max": 1.0131057500839233, "objective/train/weights_min": 0.9433047771453857, "theoretical_loss": 3.4418178680280738, "tokens_seen": 1922105344 }, { "epoch": 0.16, "learning_rate": 0.0008496605237633365, "loss": 1.3454, "theoretical_loss": 3.441808007086406, "tokens_seen": 1922170880 }, { "epoch": 0.16, "learning_rate": 0.0008493372130617524, "loss": 1.3876, "theoretical_loss": 3.441729135042321, "tokens_seen": 1922695168 }, { "epoch": 0.16, "learning_rate": 0.0008490139023601682, "loss": 1.3831, "theoretical_loss": 3.4416502905225665, "tokens_seen": 1923219456 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.01067157182842493, "objective/train/docs_used": 1087440, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.02713942527771, "objective/train/original_loss": 3.027139186859131, "objective/train/theoretical_loss": 3.441571473510038, "objective/train/tokens_used": 282602976, "objective/train/value_avg": -0.020355224609375, "objective/train/value_loss": 0.004943587351590395, "objective/train/value_max": -0.0005273818969726562, "objective/train/value_min": -0.9267578125, "objective/train/value_reward_corr": 0.48562661331286816, "objective/train/value_std": 0.029876708984375, "objective/train/weight_avg": 1.0010913610458374, "objective/train/weighted_lm_loss": 3.0308477878570557, "objective/train/weights_max": 1.047502875328064, "objective/train/weights_min": 0.9080513715744019, "theoretical_loss": 3.441571473510038, "tokens_seen": 1923743744 }, { "epoch": 0.16, "learning_rate": 0.0008486905916585838, "loss": 1.3945, "theoretical_loss": 3.441571473510038, "tokens_seen": 1923743744 }, { "epoch": 0.16, "learning_rate": 0.0008483672809569997, "loss": 1.3946, "theoretical_loss": 3.4414926839876463, "tokens_seen": 1924268032 }, { "epoch": 0.16, "learning_rate": 0.0008480439702554154, "loss": 1.3891, "theoretical_loss": 3.4414139219383166, "tokens_seen": 1924792320 }, { "epoch": 0.16, "learning_rate": 0.0008477206595538313, "loss": 1.3882, "theoretical_loss": 3.4413351873449907, "tokens_seen": 1925316608 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.004374386742711067, "objective/train/docs_used": 1088918, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.785994291305542, "objective/train/original_loss": 2.785993814468384, "objective/train/theoretical_loss": 3.4413253474504764, "objective/train/tokens_used": 284241376, "objective/train/value_avg": -0.024566650390625, "objective/train/value_loss": 0.010232404805719852, "objective/train/value_max": -0.001483917236328125, "objective/train/value_min": -0.9208984375, "objective/train/value_reward_corr": 0.2378379665755346, "objective/train/value_std": 0.0297698974609375, "objective/train/weight_avg": 1.0004873275756836, "objective/train/weighted_lm_loss": 2.788538932800293, "objective/train/weights_max": 1.0646355152130127, "objective/train/weights_min": 0.9082236289978027, "theoretical_loss": 3.4413253474504764, "tokens_seen": 1925382144 }, { "epoch": 0.16, "learning_rate": 0.0008473973488522471, "loss": 1.3657, "theoretical_loss": 3.4412564801906242, "tokens_seen": 1925840896 }, { "epoch": 0.16, "learning_rate": 0.0008470740381506627, "loss": 1.3534, "theoretical_loss": 3.4411778004581888, "tokens_seen": 1926365184 }, { "epoch": 0.16, "learning_rate": 0.0008467507274490786, "loss": 1.3804, "theoretical_loss": 3.441099148130671, "tokens_seen": 1926889472 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.006754822097718716, "objective/train/docs_used": 1089550, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.555563449859619, "objective/train/original_loss": 2.555563449859619, "objective/train/theoretical_loss": 3.4410794893288177, "objective/train/tokens_used": 285879776, "objective/train/value_avg": -0.022674560546875, "objective/train/value_loss": 0.00521583016961813, "objective/train/value_max": -0.0005593299865722656, "objective/train/value_min": -0.2783203125, "objective/train/value_reward_corr": 0.4699308390986681, "objective/train/value_std": 0.0266876220703125, "objective/train/weight_avg": 1.0007010698318481, "objective/train/weighted_lm_loss": 2.557454824447632, "objective/train/weights_max": 1.0233469009399414, "objective/train/weights_min": 0.9280031323432922, "theoretical_loss": 3.4410794893288177, "tokens_seen": 1927020544 }, { "epoch": 0.16, "learning_rate": 0.0008464274167474943, "loss": 1.3873, "theoretical_loss": 3.4410205231910735, "tokens_seen": 1927413760 }, { "epoch": 0.16, "learning_rate": 0.0008461041060459102, "loss": 1.3174, "theoretical_loss": 3.440941925622412, "tokens_seen": 1927938048 }, { "epoch": 0.16, "learning_rate": 0.000845780795344326, "loss": 1.3302, "theoretical_loss": 3.4408633554077204, "tokens_seen": 1928462336 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0009950719540938735, "objective/train/docs_used": 1091004, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6325483322143555, "objective/train/original_loss": 2.6325480937957764, "objective/train/theoretical_loss": 3.4408338986259457, "objective/train/tokens_used": 287518176, "objective/train/value_avg": -0.0198516845703125, "objective/train/value_loss": 0.003103528870269656, "objective/train/value_max": -0.00047659873962402344, "objective/train/value_min": -0.5087890625, "objective/train/value_reward_corr": 0.31509728429152273, "objective/train/value_std": 0.016815185546875, "objective/train/weight_avg": 1.000114917755127, "objective/train/weighted_lm_loss": 2.634557008743286, "objective/train/weights_max": 1.0302361249923706, "objective/train/weights_min": 0.9275555610656738, "theoretical_loss": 3.4408338986259457, "tokens_seen": 1928658944 }, { "epoch": 0.16, "learning_rate": 0.0008454574846427417, "loss": 1.3835, "theoretical_loss": 3.4407848125300444, "tokens_seen": 1928986624 }, { "epoch": 0.16, "learning_rate": 0.0008451341739411575, "loss": 1.3763, "theoretical_loss": 3.440706296972447, "tokens_seen": 1929510912 }, { "epoch": 0.16, "learning_rate": 0.0008448108632395732, "loss": 1.3629, "theoretical_loss": 3.4406278087180064, "tokens_seen": 1930035200 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.012400079518556595, "objective/train/docs_used": 1091522, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.945035219192505, "objective/train/original_loss": 2.9450342655181885, "objective/train/theoretical_loss": 3.4405885748241856, "objective/train/tokens_used": 289156576, "objective/train/value_avg": -0.0188140869140625, "objective/train/value_loss": 0.0021867267787456512, "objective/train/value_max": -0.0005316734313964844, "objective/train/value_min": -0.86083984375, "objective/train/value_reward_corr": 0.363471756296725, "objective/train/value_std": 0.02056884765625, "objective/train/weight_avg": 1.0012507438659668, "objective/train/weighted_lm_loss": 2.9492177963256836, "objective/train/weights_max": 1.048720121383667, "objective/train/weights_min": 0.9084455966949463, "theoretical_loss": 3.4405885748241856, "tokens_seen": 1930297344 }, { "epoch": 0.16, "learning_rate": 0.000844487552537989, "loss": 1.355, "theoretical_loss": 3.4405493477498146, "tokens_seen": 1930559488 }, { "epoch": 0.16, "learning_rate": 0.0008441642418364049, "loss": 1.3455, "theoretical_loss": 3.4404709140509793, "tokens_seen": 1931083776 }, { "epoch": 0.16, "learning_rate": 0.0008438409311348206, "loss": 1.3618, "theoretical_loss": 3.4403925076046225, "tokens_seen": 1931608064 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0142822265625, "objective/train/docs_used": 1092471, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.876871109008789, "objective/train/original_loss": 2.876870632171631, "objective/train/theoretical_loss": 3.4403435174073063, "objective/train/tokens_used": 290794976, "objective/train/value_avg": -0.033660888671875, "objective/train/value_loss": 0.006888589821755886, "objective/train/value_max": -0.0010404586791992188, "objective/train/value_min": -0.96875, "objective/train/value_reward_corr": 0.5878421234296678, "objective/train/value_std": 0.0693359375, "objective/train/weight_avg": 1.0014622211456299, "objective/train/weighted_lm_loss": 2.88213849067688, "objective/train/weights_max": 1.0606741905212402, "objective/train/weights_min": 0.9154029488563538, "theoretical_loss": 3.4403435174073063, "tokens_seen": 1931935744 }, { "epoch": 0.17, "learning_rate": 0.0008435176204332364, "loss": 1.3755, "theoretical_loss": 3.440314128393883, "tokens_seen": 1932132352 }, { "epoch": 0.17, "learning_rate": 0.0008431943097316521, "loss": 1.4026, "theoretical_loss": 3.4402357764019134, "tokens_seen": 1932656640 }, { "epoch": 0.17, "learning_rate": 0.0008428709990300679, "loss": 1.331, "theoretical_loss": 3.4401574516118805, "tokens_seen": 1933180928 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.004070428665727377, "objective/train/docs_used": 1092955, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7035350799560547, "objective/train/original_loss": 2.7035346031188965, "objective/train/theoretical_loss": 3.4400987258605102, "objective/train/tokens_used": 292433376, "objective/train/value_avg": -0.053436279296875, "objective/train/value_loss": 0.012673403136432171, "objective/train/value_max": -0.0005974769592285156, "objective/train/value_min": -0.99609375, "objective/train/value_reward_corr": 0.7814381251524036, "objective/train/value_std": 0.148193359375, "objective/train/weight_avg": 1.0004702806472778, "objective/train/weighted_lm_loss": 2.7062551975250244, "objective/train/weights_max": 1.0983930826187134, "objective/train/weights_min": 0.9085613489151001, "theoretical_loss": 3.4400987258605102, "tokens_seen": 1933574144 }, { "epoch": 0.17, "learning_rate": 0.0008425476883284838, "loss": 1.3884, "theoretical_loss": 3.440079154006968, "tokens_seen": 1933705216 }, { "epoch": 0.17, "learning_rate": 0.0008422243776268995, "loss": 1.3579, "theoretical_loss": 3.4400008835703733, "tokens_seen": 1934229504 }, { "epoch": 0.17, "learning_rate": 0.0008419010669253152, "loss": 1.3674, "theoretical_loss": 3.4399226402853085, "tokens_seen": 1934753792 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0021823463030159473, "objective/train/docs_used": 1093966, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6260323524475098, "objective/train/original_loss": 2.626032590866089, "objective/train/theoretical_loss": 3.439854199670431, "objective/train/tokens_used": 294071776, "objective/train/value_avg": -0.0182342529296875, "objective/train/value_loss": 0.005270556081086397, "objective/train/value_max": -0.0008559226989746094, "objective/train/value_min": -0.97412109375, "objective/train/value_reward_corr": 0.4790021392051187, "objective/train/value_std": 0.02716064453125, "objective/train/weight_avg": 1.0002440214157104, "objective/train/weighted_lm_loss": 2.626844882965088, "objective/train/weights_max": 1.0186526775360107, "objective/train/weights_min": 0.9083866477012634, "theoretical_loss": 3.439854199670431, "tokens_seen": 1935212544 }, { "epoch": 0.17, "learning_rate": 0.000841577756223731, "loss": 1.3943, "theoretical_loss": 3.4398444241350017, "tokens_seen": 1935278080 }, { "epoch": 0.17, "learning_rate": 0.0008412544455221468, "loss": 1.3983, "theoretical_loss": 3.439766235102695, "tokens_seen": 1935802368 }, { "epoch": 0.17, "learning_rate": 0.0008409311348205626, "loss": 1.3954, "theoretical_loss": 3.4396880731716455, "tokens_seen": 1936326656 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.009049537591636181, "objective/train/docs_used": 1094637, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.991359233856201, "objective/train/original_loss": 2.991359233856201, "objective/train/theoretical_loss": 3.439609938325126, "objective/train/tokens_used": 295710176, "objective/train/value_avg": -0.0242767333984375, "objective/train/value_loss": 0.0027245194651186466, "objective/train/value_max": -0.0006413459777832031, "objective/train/value_min": -0.7236328125, "objective/train/value_reward_corr": 0.38298188951456996, "objective/train/value_std": 0.0253448486328125, "objective/train/weight_avg": 1.0009185075759888, "objective/train/weighted_lm_loss": 2.9950647354125977, "objective/train/weights_max": 1.05003821849823, "objective/train/weights_min": 0.9233407974243164, "theoretical_loss": 3.439609938325126, "tokens_seen": 1936850944 }, { "epoch": 0.17, "learning_rate": 0.0008406078241189784, "loss": 1.4043, "theoretical_loss": 3.439609938325126, "tokens_seen": 1936850944 }, { "epoch": 0.17, "learning_rate": 0.0008402845134173941, "loss": 1.3765, "theoretical_loss": 3.4395318305464224, "tokens_seen": 1937375232 }, { "epoch": 0.17, "learning_rate": 0.0008399612027158099, "loss": 1.4016, "theoretical_loss": 3.439453749818837, "tokens_seen": 1937899520 }, { "epoch": 0.17, "learning_rate": 0.0008396378920142257, "loss": 1.3754, "theoretical_loss": 3.439375696125687, "tokens_seen": 1938423808 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.012353584170341492, "objective/train/docs_used": 1095318, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6138620376586914, "objective/train/original_loss": 2.6138622760772705, "objective/train/theoretical_loss": 3.4393659413140716, "objective/train/tokens_used": 297348576, "objective/train/value_avg": -0.020660400390625, "objective/train/value_loss": 0.0019467566162347794, "objective/train/value_max": -0.0012645721435546875, "objective/train/value_min": -0.420166015625, "objective/train/value_reward_corr": 0.2817962708080211, "objective/train/value_std": 0.0223388671875, "objective/train/weight_avg": 1.0012450218200684, "objective/train/weighted_lm_loss": 2.6177921295166016, "objective/train/weights_max": 1.0399041175842285, "objective/train/weights_min": 0.9288323521614075, "theoretical_loss": 3.4393659413140716, "tokens_seen": 1938489344 }, { "epoch": 0.17, "learning_rate": 0.0008393145813126414, "loss": 1.3798, "theoretical_loss": 3.439297669450303, "tokens_seen": 1938948096 }, { "epoch": 0.17, "learning_rate": 0.0008389912706110573, "loss": 1.3935, "theoretical_loss": 3.4392196697760324, "tokens_seen": 1939472384 }, { "epoch": 0.17, "learning_rate": 0.000838667959909473, "loss": 1.3633, "theoretical_loss": 3.4391416970862347, "tokens_seen": 1939996672 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.00014355106395669281, "objective/train/docs_used": 1096386, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.179112434387207, "objective/train/original_loss": 3.179112672805786, "objective/train/theoretical_loss": 3.439122208128161, "objective/train/tokens_used": 298986976, "objective/train/value_avg": -0.02996826171875, "objective/train/value_loss": 0.011021994985640049, "objective/train/value_max": -0.001094818115234375, "objective/train/value_min": -0.970703125, "objective/train/value_reward_corr": 0.6241270435260775, "objective/train/value_std": 0.0657958984375, "objective/train/weight_avg": 1.0000684261322021, "objective/train/weighted_lm_loss": 3.1792702674865723, "objective/train/weights_max": 1.048297643661499, "objective/train/weights_min": 0.9073925614356995, "theoretical_loss": 3.439122208128161, "tokens_seen": 1940127744 }, { "epoch": 0.17, "learning_rate": 0.0008383446492078888, "loss": 1.3991, "theoretical_loss": 3.4390637513642868, "tokens_seen": 1940520960 }, { "epoch": 0.17, "learning_rate": 0.0008380213385063046, "loss": 1.4213, "theoretical_loss": 3.4389858325935783, "tokens_seen": 1941045248 }, { "epoch": 0.17, "learning_rate": 0.0008376980278047203, "loss": 1.3921, "theoretical_loss": 3.438907940757515, "tokens_seen": 1941569536 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.00437566451728344, "objective/train/docs_used": 1097742, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.44340181350708, "objective/train/original_loss": 2.44340181350708, "objective/train/theoretical_loss": 3.438878738259695, "objective/train/tokens_used": 300625376, "objective/train/value_avg": -0.02191162109375, "objective/train/value_loss": 0.0017287518130615354, "objective/train/value_max": -0.0006513595581054688, "objective/train/value_min": -0.49169921875, "objective/train/value_reward_corr": 0.5658737532236628, "objective/train/value_std": 0.0269012451171875, "objective/train/weight_avg": 1.0004462003707886, "objective/train/weighted_lm_loss": 2.445186138153076, "objective/train/weights_max": 1.0249384641647339, "objective/train/weights_min": 0.9484029412269592, "theoretical_loss": 3.438878738259695, "tokens_seen": 1941766144 }, { "epoch": 0.17, "learning_rate": 0.0008373747171031362, "loss": 1.383, "theoretical_loss": 3.4388300758395163, "tokens_seen": 1942093824 }, { "epoch": 0.17, "learning_rate": 0.0008370514064015518, "loss": 1.3601, "theoretical_loss": 3.4387522378230173, "tokens_seen": 1942618112 }, { "epoch": 0.17, "learning_rate": 0.0008367280956999677, "loss": 1.3692, "theoretical_loss": 3.438674426691467, "tokens_seen": 1943142400 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0077302828431129456, "objective/train/docs_used": 1098461, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5884969234466553, "objective/train/original_loss": 2.588496685028076, "objective/train/theoretical_loss": 3.438635531202379, "objective/train/tokens_used": 302263776, "objective/train/value_avg": -0.0281219482421875, "objective/train/value_loss": 0.007392663508653641, "objective/train/value_max": -0.0011882781982421875, "objective/train/value_min": -0.9501953125, "objective/train/value_reward_corr": 0.5743906399654213, "objective/train/value_std": 0.055084228515625, "objective/train/weight_avg": 1.0008093118667603, "objective/train/weighted_lm_loss": 2.5915729999542236, "objective/train/weights_max": 1.077174425125122, "objective/train/weights_min": 0.9150641560554504, "theoretical_loss": 3.438635531202379, "tokens_seen": 1943404544 }, { "epoch": 0.17, "learning_rate": 0.0008364047849983835, "loss": 1.3782, "theoretical_loss": 3.4385966424283287, "tokens_seen": 1943666688 }, { "epoch": 0.17, "learning_rate": 0.0008360814742967992, "loss": 1.41, "theoretical_loss": 3.438518885017081, "tokens_seen": 1944190976 }, { "epoch": 0.17, "learning_rate": 0.0008357581635952151, "loss": 1.3967, "theoretical_loss": 3.438441154441218, "tokens_seen": 1944715264 }, { "epoch": 0.17, "objective/train/advantage_avg": -0.006080897990614176, "objective/train/docs_used": 1099474, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7229511737823486, "objective/train/original_loss": 2.7229511737823486, "objective/train/theoretical_loss": 3.438392586451318, "objective/train/tokens_used": 303902176, "objective/train/value_avg": -0.023284912109375, "objective/train/value_loss": 0.014011848717927933, "objective/train/value_max": -0.0013408660888671875, "objective/train/value_min": -0.94970703125, "objective/train/value_reward_corr": 0.17255341302513913, "objective/train/value_std": 0.03173828125, "objective/train/weight_avg": 0.9994603991508484, "objective/train/weighted_lm_loss": 2.7258126735687256, "objective/train/weights_max": 1.0806688070297241, "objective/train/weights_min": 0.9104644060134888, "theoretical_loss": 3.438392586451318, "tokens_seen": 1945042944 }, { "epoch": 0.17, "learning_rate": 0.0008354348528936307, "loss": 1.401, "theoretical_loss": 3.4383634506842466, "tokens_seen": 1945239552 }, { "epoch": 0.17, "learning_rate": 0.0008351115421920465, "loss": 1.374, "theoretical_loss": 3.438285773729689, "tokens_seen": 1945763840 }, { "epoch": 0.17, "learning_rate": 0.0008347882314904624, "loss": 1.3549, "theoretical_loss": 3.4382081235610826, "tokens_seen": 1946288128 }, { "epoch": 0.17, "objective/train/advantage_avg": -0.003251043614000082, "objective/train/docs_used": 1099984, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5572500228881836, "objective/train/original_loss": 2.5572500228881836, "objective/train/theoretical_loss": 3.438149903503012, "objective/train/tokens_used": 305540576, "objective/train/value_avg": -0.045745849609375, "objective/train/value_loss": 0.014988024719059467, "objective/train/value_max": -0.0007762908935546875, "objective/train/value_min": -0.96435546875, "objective/train/value_reward_corr": 0.6224286873991105, "objective/train/value_std": 0.08172607421875, "objective/train/weight_avg": 0.999748706817627, "objective/train/weighted_lm_loss": 2.5556888580322266, "objective/train/weights_max": 1.0705066919326782, "objective/train/weights_min": 0.9070842862129211, "theoretical_loss": 3.438149903503012, "tokens_seen": 1946681344 }, { "epoch": 0.17, "learning_rate": 0.0008344649207888781, "loss": 1.4221, "theoretical_loss": 3.4381305001619777, "tokens_seen": 1946812416 }, { "epoch": 0.17, "learning_rate": 0.000834141610087294, "loss": 1.414, "theoretical_loss": 3.438052903515941, "tokens_seen": 1947336704 }, { "epoch": 0.17, "learning_rate": 0.0008338182993857096, "loss": 1.3659, "theoretical_loss": 3.437975333606553, "tokens_seen": 1947860992 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.007472518365830183, "objective/train/docs_used": 1101410, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.618328809738159, "objective/train/original_loss": 2.61832857131958, "objective/train/theoretical_loss": 3.4379074818553486, "objective/train/tokens_used": 307178976, "objective/train/value_avg": -0.01904296875, "objective/train/value_loss": 0.002574007725343108, "objective/train/value_max": -0.0008492469787597656, "objective/train/value_min": -0.47705078125, "objective/train/value_reward_corr": 0.32055813200084865, "objective/train/value_std": 0.0190277099609375, "objective/train/weight_avg": 1.0007599592208862, "objective/train/weighted_lm_loss": 2.6207938194274902, "objective/train/weights_max": 1.022871971130371, "objective/train/weights_min": 0.916513204574585, "theoretical_loss": 3.4379074818553486, "tokens_seen": 1948319744 }, { "epoch": 0.18, "learning_rate": 0.0008334949886841254, "loss": 1.3531, "theoretical_loss": 3.4378977904174084, "tokens_seen": 1948385280 }, { "epoch": 0.18, "learning_rate": 0.0008331716779825413, "loss": 1.3553, "theoretical_loss": 3.437820273932116, "tokens_seen": 1948909568 }, { "epoch": 0.18, "learning_rate": 0.000832848367280957, "loss": 1.3683, "theoretical_loss": 3.4377427841343007, "tokens_seen": 1949433856 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.006936562247574329, "objective/train/docs_used": 1102181, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0073609352111816, "objective/train/original_loss": 3.0073606967926025, "objective/train/theoretical_loss": 3.4376653210075996, "objective/train/tokens_used": 308817376, "objective/train/value_avg": -0.021270751953125, "objective/train/value_loss": 0.003706657560542226, "objective/train/value_max": -0.000865936279296875, "objective/train/value_min": -0.94287109375, "objective/train/value_reward_corr": 0.40073125741024723, "objective/train/value_std": 0.0247039794921875, "objective/train/weight_avg": 1.0007117986679077, "objective/train/weighted_lm_loss": 3.011354446411133, "objective/train/weights_max": 1.034043312072754, "objective/train/weights_min": 0.9084322452545166, "theoretical_loss": 3.4376653210075996, "tokens_seen": 1949958144 }, { "epoch": 0.18, "learning_rate": 0.0008325250565793728, "loss": 1.3725, "theoretical_loss": 3.4376653210075996, "tokens_seen": 1949958144 }, { "epoch": 0.18, "learning_rate": 0.0008322017458777885, "loss": 1.3553, "theoretical_loss": 3.437587884535666, "tokens_seen": 1950482432 }, { "epoch": 0.18, "learning_rate": 0.0008318784351762043, "loss": 1.3542, "theoretical_loss": 3.437510474702168, "tokens_seen": 1951006720 }, { "epoch": 0.18, "learning_rate": 0.0008315551244746202, "loss": 1.3626, "theoretical_loss": 3.437433091490785, "tokens_seen": 1951531008 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.008726296946406364, "objective/train/docs_used": 1103575, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4771275520324707, "objective/train/original_loss": 2.4771270751953125, "objective/train/theoretical_loss": 3.4374234204604175, "objective/train/tokens_used": 310455776, "objective/train/value_avg": -0.0238037109375, "objective/train/value_loss": 0.0048393975012004375, "objective/train/value_max": -0.0009889602661132812, "objective/train/value_min": -0.63671875, "objective/train/value_reward_corr": 0.3311347622489436, "objective/train/value_std": 0.034912109375, "objective/train/weight_avg": 1.0008964538574219, "objective/train/weighted_lm_loss": 2.479717969894409, "objective/train/weights_max": 1.0554628372192383, "objective/train/weights_min": 0.9108964800834656, "theoretical_loss": 3.4374234204604175, "tokens_seen": 1951596544 }, { "epoch": 0.18, "learning_rate": 0.0008312318137730359, "loss": 1.3535, "theoretical_loss": 3.437355734885215, "tokens_seen": 1952055296 }, { "epoch": 0.18, "learning_rate": 0.0008309085030714517, "loss": 1.2972, "theoretical_loss": 3.4372784048691667, "tokens_seen": 1952579584 }, { "epoch": 0.18, "learning_rate": 0.0008305851923698675, "loss": 1.3598, "theoretical_loss": 3.437201101426365, "tokens_seen": 1953103872 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0025030511897057295, "objective/train/docs_used": 1104173, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.327694892883301, "objective/train/original_loss": 2.327694892883301, "objective/train/theoretical_loss": 3.4371817797158286, "objective/train/tokens_used": 312094176, "objective/train/value_avg": -0.0178680419921875, "objective/train/value_loss": 0.003121996531262994, "objective/train/value_max": -0.0010652542114257812, "objective/train/value_min": -0.394775390625, "objective/train/value_reward_corr": 0.41445420015887774, "objective/train/value_std": 0.0182647705078125, "objective/train/weight_avg": 1.0002657175064087, "objective/train/weighted_lm_loss": 2.3289239406585693, "objective/train/weights_max": 1.0297117233276367, "objective/train/weights_min": 0.9229324460029602, "theoretical_loss": 3.4371817797158286, "tokens_seen": 1953234944 }, { "epoch": 0.18, "learning_rate": 0.0008302618816682832, "loss": 1.3556, "theoretical_loss": 3.4371238245405498, "tokens_seen": 1953628160 }, { "epoch": 0.18, "learning_rate": 0.000829938570966699, "loss": 1.3782, "theoretical_loss": 3.437046574195473, "tokens_seen": 1954152448 }, { "epoch": 0.18, "learning_rate": 0.0008296152602651148, "loss": 1.3741, "theoretical_loss": 3.4369693503749033, "tokens_seen": 1954676736 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.01156978402286768, "objective/train/docs_used": 1105519, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.922393321990967, "objective/train/original_loss": 2.922393321990967, "objective/train/theoretical_loss": 3.436940398277228, "objective/train/tokens_used": 313732576, "objective/train/value_avg": -0.023406982421875, "objective/train/value_loss": 0.0017024375265464187, "objective/train/value_max": -0.0009622573852539062, "objective/train/value_min": -0.38623046875, "objective/train/value_reward_corr": 0.29988232213142585, "objective/train/value_std": 0.0212860107421875, "objective/train/weight_avg": 1.001165509223938, "objective/train/weighted_lm_loss": 2.926607131958008, "objective/train/weights_max": 1.0320336818695068, "objective/train/weights_min": 0.9495770931243896, "theoretical_loss": 3.436940398277228, "tokens_seen": 1954873344 }, { "epoch": 0.18, "learning_rate": 0.0008292919495635306, "loss": 1.3603, "theoretical_loss": 3.4368921530626215, "tokens_seen": 1955201024 }, { "epoch": 0.18, "learning_rate": 0.0008289686388619464, "loss": 1.3925, "theoretical_loss": 3.4368149822424243, "tokens_seen": 1955725312 }, { "epoch": 0.18, "learning_rate": 0.0008286453281603621, "loss": 1.3546, "theoretical_loss": 3.436737837898122, "tokens_seen": 1956249600 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.011796032078564167, "objective/train/docs_used": 1106102, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.770855665206909, "objective/train/original_loss": 2.77085542678833, "objective/train/theoretical_loss": 3.436699275649376, "objective/train/tokens_used": 315370976, "objective/train/value_avg": -0.0173797607421875, "objective/train/value_loss": 0.0006557661690749228, "objective/train/value_max": -0.0011377334594726562, "objective/train/value_min": -0.2476806640625, "objective/train/value_reward_corr": 0.22047559179669457, "objective/train/value_std": 0.0136871337890625, "objective/train/weight_avg": 1.0011829137802124, "objective/train/weighted_lm_loss": 2.7752506732940674, "objective/train/weights_max": 1.0213621854782104, "objective/train/weights_min": 0.9786631464958191, "theoretical_loss": 3.436699275649376, "tokens_seen": 1956511744 }, { "epoch": 0.18, "learning_rate": 0.0008283220174587778, "loss": 1.3671, "theoretical_loss": 3.4366607200135384, "tokens_seen": 1956773888 }, { "epoch": 0.18, "learning_rate": 0.0008279987067571937, "loss": 1.3516, "theoretical_loss": 3.4365836285725138, "tokens_seen": 1957298176 }, { "epoch": 0.18, "learning_rate": 0.0008276753960556095, "loss": 1.3389, "theoretical_loss": 3.436506563558899, "tokens_seen": 1957822464 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.009821400046348572, "objective/train/docs_used": 1106840, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4988391399383545, "objective/train/original_loss": 2.4988393783569336, "objective/train/theoretical_loss": 3.436458411338391, "objective/train/tokens_used": 317009376, "objective/train/value_avg": -0.0181427001953125, "objective/train/value_loss": 0.0013699524570256472, "objective/train/value_max": -0.0007791519165039062, "objective/train/value_min": -0.4072265625, "objective/train/value_reward_corr": 0.2331040886086288, "objective/train/value_std": 0.01526641845703125, "objective/train/weight_avg": 1.0009889602661133, "objective/train/weighted_lm_loss": 2.5024542808532715, "objective/train/weights_max": 1.0165849924087524, "objective/train/weights_min": 0.9154652953147888, "theoretical_loss": 3.436458411338391, "tokens_seen": 1958150144 }, { "epoch": 0.18, "learning_rate": 0.0008273520853540253, "loss": 1.3515, "theoretical_loss": 3.436429524956563, "tokens_seen": 1958346752 }, { "epoch": 0.18, "learning_rate": 0.000827028774652441, "loss": 1.3436, "theoretical_loss": 3.436352512749386, "tokens_seen": 1958871040 }, { "epoch": 0.18, "learning_rate": 0.0008267054639508567, "loss": 1.3301, "theoretical_loss": 3.436275526921264, "tokens_seen": 1959395328 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.010332440957427025, "objective/train/docs_used": 1108280, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7219398021698, "objective/train/original_loss": 2.7219398021698, "objective/train/theoretical_loss": 3.436217804851747, "objective/train/tokens_used": 318647776, "objective/train/value_avg": -0.0230865478515625, "objective/train/value_loss": 0.004800387658178806, "objective/train/value_max": -0.0009584426879882812, "objective/train/value_min": -0.984375, "objective/train/value_reward_corr": 0.501228332614603, "objective/train/value_std": 0.054534912109375, "objective/train/weight_avg": 1.0010570287704468, "objective/train/weighted_lm_loss": 2.7256808280944824, "objective/train/weights_max": 1.1000525951385498, "objective/train/weights_min": 0.9084038138389587, "theoretical_loss": 3.436217804851747, "tokens_seen": 1959788544 }, { "epoch": 0.18, "learning_rate": 0.0008263821532492726, "loss": 1.3232, "theoretical_loss": 3.436198567456106, "tokens_seen": 1959919616 }, { "epoch": 0.18, "learning_rate": 0.0008260588425476884, "loss": 1.3297, "theoretical_loss": 3.4361216343378365, "tokens_seen": 1960443904 }, { "epoch": 0.18, "learning_rate": 0.0008257355318461041, "loss": 1.3296, "theoretical_loss": 3.436044727550393, "tokens_seen": 1960968192 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.005664799362421036, "objective/train/docs_used": 1108943, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.671004295349121, "objective/train/original_loss": 2.671003818511963, "objective/train/theoretical_loss": 3.435977455698269, "objective/train/tokens_used": 320286176, "objective/train/value_avg": -0.03472900390625, "objective/train/value_loss": 0.005881099961698055, "objective/train/value_max": -0.0006513595581054688, "objective/train/value_min": -0.9833984375, "objective/train/value_reward_corr": 0.8681358150313213, "objective/train/value_std": 0.101318359375, "objective/train/weight_avg": 1.0005953311920166, "objective/train/weighted_lm_loss": 2.6735355854034424, "objective/train/weights_max": 1.077481746673584, "objective/train/weights_min": 0.9057987928390503, "theoretical_loss": 3.435977455698269, "tokens_seen": 1961426944 }, { "epoch": 0.18, "learning_rate": 0.0008254122211445199, "loss": 1.3596, "theoretical_loss": 3.4359678470777273, "tokens_seen": 1961492480 }, { "epoch": 0.18, "learning_rate": 0.0008250889104429356, "loss": 1.3102, "theoretical_loss": 3.435890992903805, "tokens_seen": 1962016768 }, { "epoch": 0.18, "learning_rate": 0.0008247655997413515, "loss": 1.3809, "theoretical_loss": 3.435814165012606, "tokens_seen": 1962541056 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0077169546857476234, "objective/train/docs_used": 1110285, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4553825855255127, "objective/train/original_loss": 2.4553821086883545, "objective/train/theoretical_loss": 3.4357373633881254, "objective/train/tokens_used": 321924576, "objective/train/value_avg": -0.0196990966796875, "objective/train/value_loss": 0.0023165643215179443, "objective/train/value_max": -0.000614166259765625, "objective/train/value_min": -0.798828125, "objective/train/value_reward_corr": 0.40731284566154063, "objective/train/value_std": 0.022735595703125, "objective/train/weight_avg": 1.000783085823059, "objective/train/weighted_lm_loss": 2.4578020572662354, "objective/train/weights_max": 1.0445470809936523, "objective/train/weights_min": 0.9092017412185669, "theoretical_loss": 3.4357373633881254, "tokens_seen": 1963065344 }, { "epoch": 0.18, "learning_rate": 0.0008244422890397673, "loss": 1.305, "theoretical_loss": 3.4357373633881254, "tokens_seen": 1963065344 }, { "epoch": 0.18, "learning_rate": 0.000824118978338183, "loss": 1.3387, "theoretical_loss": 3.43566058801437, "tokens_seen": 1963589632 }, { "epoch": 0.18, "learning_rate": 0.0008237956676365988, "loss": 1.3292, "theoretical_loss": 3.4355838388753623, "tokens_seen": 1964113920 }, { "epoch": 0.18, "learning_rate": 0.0008234723569350145, "loss": 1.3151, "theoretical_loss": 3.435507115955139, "tokens_seen": 1964638208 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.01614287868142128, "objective/train/docs_used": 1110737, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.44508695602417, "objective/train/original_loss": 3.445086717605591, "objective/train/theoretical_loss": 3.4354975274328248, "objective/train/tokens_used": 323562976, "objective/train/value_avg": -0.0254669189453125, "objective/train/value_loss": 0.0028345673345029354, "objective/train/value_max": -0.0009217262268066406, "objective/train/value_min": -0.697265625, "objective/train/value_reward_corr": 0.25041427633486196, "objective/train/value_std": 0.0214691162109375, "objective/train/weight_avg": 1.0016282796859741, "objective/train/weighted_lm_loss": 3.451572895050049, "objective/train/weights_max": 1.034922480583191, "objective/train/weights_min": 0.9309747219085693, "theoretical_loss": 3.4354975274328248, "tokens_seen": 1964703744 }, { "epoch": 0.19, "learning_rate": 0.0008231490462334303, "loss": 1.2849, "theoretical_loss": 3.435430419237749, "tokens_seen": 1965162496 }, { "epoch": 0.19, "learning_rate": 0.0008228257355318462, "loss": 1.2932, "theoretical_loss": 3.435353748707257, "tokens_seen": 1965686784 }, { "epoch": 0.19, "learning_rate": 0.0008225024248302619, "loss": 1.317, "theoretical_loss": 3.4352771043477413, "tokens_seen": 1966211072 }, { "epoch": 0.19, "objective/train/advantage_avg": -0.005044080782681704, "objective/train/docs_used": 1112038, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1969141960144043, "objective/train/original_loss": 3.196913719177246, "objective/train/theoretical_loss": 3.435257947345212, "objective/train/tokens_used": 325201376, "objective/train/value_avg": -0.03515625, "objective/train/value_loss": 0.01597760058939457, "objective/train/value_max": -0.0012302398681640625, "objective/train/value_min": -0.95556640625, "objective/train/value_reward_corr": 0.530430020533668, "objective/train/value_std": 0.059600830078125, "objective/train/weight_avg": 0.9995738863945007, "objective/train/weighted_lm_loss": 3.194790840148926, "objective/train/weights_max": 1.0480602979660034, "objective/train/weights_min": 0.9106881618499756, "theoretical_loss": 3.435257947345212, "tokens_seen": 1966342144 }, { "epoch": 0.19, "learning_rate": 0.0008221791141286777, "loss": 1.3671, "theoretical_loss": 3.435200486143292, "tokens_seen": 1966735360 }, { "epoch": 0.19, "learning_rate": 0.0008218558034270934, "loss": 1.325, "theoretical_loss": 3.435123894078017, "tokens_seen": 1967259648 }, { "epoch": 0.19, "learning_rate": 0.0008215324927255092, "loss": 1.3115, "theoretical_loss": 3.435047328136034, "tokens_seen": 1967783936 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.009604474529623985, "objective/train/docs_used": 1112624, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.315398931503296, "objective/train/original_loss": 2.315398931503296, "objective/train/theoretical_loss": 3.435018622639463, "objective/train/tokens_used": 326839776, "objective/train/value_avg": -0.0169219970703125, "objective/train/value_loss": 0.000628960900940001, "objective/train/value_max": -0.0008296966552734375, "objective/train/value_min": -0.38525390625, "objective/train/value_reward_corr": 0.31810167344257323, "objective/train/value_std": 0.018829345703125, "objective/train/weight_avg": 1.000963568687439, "objective/train/weighted_lm_loss": 2.3184916973114014, "objective/train/weights_max": 1.0383695363998413, "objective/train/weights_min": 0.9404564499855042, "theoretical_loss": 3.435018622639463, "tokens_seen": 1967980544 }, { "epoch": 0.19, "learning_rate": 0.0008212091820239251, "loss": 1.3163, "theoretical_loss": 3.4349707883014773, "tokens_seen": 1968308224 }, { "epoch": 0.19, "learning_rate": 0.0008208858713223408, "loss": 1.2591, "theoretical_loss": 3.434894274558495, "tokens_seen": 1968832512 }, { "epoch": 0.19, "learning_rate": 0.0008205625606207565, "loss": 1.3311, "theoretical_loss": 3.434817786891247, "tokens_seen": 1969356800 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0036946400068700314, "objective/train/docs_used": 1113989, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5109572410583496, "objective/train/original_loss": 2.5109574794769287, "objective/train/theoretical_loss": 3.4347795528310776, "objective/train/tokens_used": 328478176, "objective/train/value_avg": -0.0240325927734375, "objective/train/value_loss": 0.002512700855731964, "objective/train/value_max": -0.000865936279296875, "objective/train/value_min": -0.444580078125, "objective/train/value_reward_corr": 0.5427498880926339, "objective/train/value_std": 0.0305023193359375, "objective/train/weight_avg": 1.0003818273544312, "objective/train/weighted_lm_loss": 2.5131497383117676, "objective/train/weights_max": 1.0448299646377563, "objective/train/weights_min": 0.9292152523994446, "theoretical_loss": 3.4347795528310776, "tokens_seen": 1969618944 }, { "epoch": 0.19, "learning_rate": 0.0008202392499191723, "loss": 1.3161, "theoretical_loss": 3.434741325283909, "tokens_seen": 1969881088 }, { "epoch": 0.19, "learning_rate": 0.0008199159392175881, "loss": 1.3391, "theoretical_loss": 3.43466488972067, "tokens_seen": 1970405376 }, { "epoch": 0.19, "learning_rate": 0.000819592628516004, "loss": 1.3316, "theoretical_loss": 3.4345884801857323, "tokens_seen": 1970929664 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0010993743781000376, "objective/train/docs_used": 1114737, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.137514591217041, "objective/train/original_loss": 3.13751482963562, "objective/train/theoretical_loss": 3.4345407374368793, "objective/train/tokens_used": 330116576, "objective/train/value_avg": -0.0244598388671875, "objective/train/value_loss": 0.007371141575276852, "objective/train/value_max": -0.00018525123596191406, "objective/train/value_min": -0.62841796875, "objective/train/value_reward_corr": 0.5502403712866853, "objective/train/value_std": 0.030853271484375, "objective/train/weight_avg": 1.0001461505889893, "objective/train/weighted_lm_loss": 3.1376736164093018, "objective/train/weights_max": 1.0552905797958374, "objective/train/weights_min": 0.9124779105186462, "theoretical_loss": 3.4345407374368793, "tokens_seen": 1971257344 }, { "epoch": 0.19, "learning_rate": 0.0008192693178144197, "loss": 1.3211, "theoretical_loss": 3.4345120966633127, "tokens_seen": 1971453952 }, { "epoch": 0.19, "learning_rate": 0.0008189460071128354, "loss": 1.334, "theoretical_loss": 3.4344357391376406, "tokens_seen": 1971978240 }, { "epoch": 0.19, "learning_rate": 0.0008186226964112512, "loss": 1.3392, "theoretical_loss": 3.434359407592961, "tokens_seen": 1972502528 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.009016519412398338, "objective/train/docs_used": 1116152, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.904371976852417, "objective/train/original_loss": 2.904372453689575, "objective/train/theoretical_loss": 3.4343021759750063, "objective/train/tokens_used": 331754976, "objective/train/value_avg": -0.0185699462890625, "objective/train/value_loss": 0.0019734911620616913, "objective/train/value_max": -0.0011119842529296875, "objective/train/value_min": -0.84375, "objective/train/value_reward_corr": 0.48174924023247945, "objective/train/value_std": 0.02508544921875, "objective/train/weight_avg": 1.0009113550186157, "objective/train/weighted_lm_loss": 2.908071279525757, "objective/train/weights_max": 1.0430034399032593, "objective/train/weights_min": 0.9127934575080872, "theoretical_loss": 3.4343021759750063, "tokens_seen": 1972895744 }, { "epoch": 0.19, "learning_rate": 0.000818299385709667, "loss": 1.2939, "theoretical_loss": 3.4342831020135307, "tokens_seen": 1973026816 }, { "epoch": 0.19, "learning_rate": 0.0008179760750080829, "loss": 1.3167, "theoretical_loss": 3.4342068223836213, "tokens_seen": 1973551104 }, { "epoch": 0.19, "learning_rate": 0.0008176527643064986, "loss": 1.3255, "theoretical_loss": 3.434130568687518, "tokens_seen": 1974075392 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0047695934772491455, "objective/train/docs_used": 1116806, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6694886684417725, "objective/train/original_loss": 2.6694884300231934, "objective/train/theoretical_loss": 3.43406386796491, "objective/train/tokens_used": 333393376, "objective/train/value_avg": -0.0313720703125, "objective/train/value_loss": 0.0030114990659058094, "objective/train/value_max": -0.0008425712585449219, "objective/train/value_min": -0.434814453125, "objective/train/value_reward_corr": 0.8115514926624109, "objective/train/value_std": 0.052734375, "objective/train/weight_avg": 1.0004918575286865, "objective/train/weighted_lm_loss": 2.670596122741699, "objective/train/weights_max": 1.0374466180801392, "objective/train/weights_min": 0.9565434455871582, "theoretical_loss": 3.43406386796491, "tokens_seen": 1974534144 }, { "epoch": 0.19, "learning_rate": 0.0008173294536049143, "loss": 1.2695, "theoretical_loss": 3.43405434090952, "tokens_seen": 1974599680 }, { "epoch": 0.19, "learning_rate": 0.0008170061429033301, "loss": 1.278, "theoretical_loss": 3.433978139033939, "tokens_seen": 1975123968 }, { "epoch": 0.19, "learning_rate": 0.0008166828322017459, "loss": 1.3197, "theoretical_loss": 3.433901963045101, "tokens_seen": 1975648256 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.014590212143957615, "objective/train/docs_used": 1117380, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8773200511932373, "objective/train/original_loss": 2.8773205280303955, "objective/train/theoretical_loss": 3.433825812927347, "objective/train/tokens_used": 335031776, "objective/train/value_avg": -0.0203094482421875, "objective/train/value_loss": 0.0013419679598882794, "objective/train/value_max": -0.0006117820739746094, "objective/train/value_min": -0.61767578125, "objective/train/value_reward_corr": 0.41146986325158796, "objective/train/value_std": 0.0272674560546875, "objective/train/weight_avg": 1.0014656782150269, "objective/train/weighted_lm_loss": 2.8822507858276367, "objective/train/weights_max": 1.0452690124511719, "objective/train/weights_min": 0.9249326586723328, "theoretical_loss": 3.433825812927347, "tokens_seen": 1976172544 }, { "epoch": 0.19, "learning_rate": 0.0008163595215001616, "loss": 1.3365, "theoretical_loss": 3.433825812927347, "tokens_seen": 1976172544 }, { "epoch": 0.19, "learning_rate": 0.0008160362107985775, "loss": 1.3309, "theoretical_loss": 3.433749688665029, "tokens_seen": 1976696832 }, { "epoch": 0.19, "learning_rate": 0.0008157129000969933, "loss": 1.3024, "theoretical_loss": 3.433673590242515, "tokens_seen": 1977221120 }, { "epoch": 0.19, "learning_rate": 0.000815389589395409, "loss": 1.3237, "theoretical_loss": 3.433597517644184, "tokens_seen": 1977745408 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0017665618797764182, "objective/train/docs_used": 1118619, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9023218154907227, "objective/train/original_loss": 2.9023220539093018, "objective/train/theoretical_loss": 3.433588010384379, "objective/train/tokens_used": 336670176, "objective/train/value_avg": -0.0229339599609375, "objective/train/value_loss": 0.005683680064976215, "objective/train/value_max": -0.0005702972412109375, "objective/train/value_min": -0.87255859375, "objective/train/value_reward_corr": 0.6258814687870408, "objective/train/value_std": 0.057037353515625, "objective/train/weight_avg": 1.0002046823501587, "objective/train/weighted_lm_loss": 2.9033522605895996, "objective/train/weights_max": 1.083462119102478, "objective/train/weights_min": 0.909705400466919, "theoretical_loss": 3.433588010384379, "tokens_seen": 1977810944 }, { "epoch": 0.19, "learning_rate": 0.0008150662786938248, "loss": 1.328, "theoretical_loss": 3.4335214708544326, "tokens_seen": 1978269696 }, { "epoch": 0.19, "learning_rate": 0.0008147429679922405, "loss": 1.3044, "theoretical_loss": 3.4334454498576665, "tokens_seen": 1978793984 }, { "epoch": 0.19, "learning_rate": 0.0008144196572906564, "loss": 1.3159, "theoretical_loss": 3.433369454638308, "tokens_seen": 1979318272 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.009772076271474361, "objective/train/docs_used": 1119354, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.525803565979004, "objective/train/original_loss": 2.525803804397583, "objective/train/theoretical_loss": 3.433350459859364, "objective/train/tokens_used": 338308576, "objective/train/value_avg": -0.01861572265625, "objective/train/value_loss": 0.0023572056088596582, "objective/train/value_max": -0.0006361007690429688, "objective/train/value_min": -0.67626953125, "objective/train/value_reward_corr": 0.36514896340643305, "objective/train/value_std": 0.019317626953125, "objective/train/weight_avg": 1.0009887218475342, "objective/train/weighted_lm_loss": 2.5285255908966064, "objective/train/weights_max": 1.0223592519760132, "objective/train/weights_min": 0.9192579388618469, "theoretical_loss": 3.433350459859364, "tokens_seen": 1979449344 }, { "epoch": 0.19, "learning_rate": 0.0008140963465890722, "loss": 1.2831, "theoretical_loss": 3.4332934851807915, "tokens_seen": 1979842560 }, { "epoch": 0.19, "learning_rate": 0.0008137730358874878, "loss": 1.2775, "theoretical_loss": 3.4332175414695647, "tokens_seen": 1980366848 }, { "epoch": 0.19, "learning_rate": 0.0008134497251859037, "loss": 1.3262, "theoretical_loss": 3.4331416234890906, "tokens_seen": 1980891136 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.010964920744299889, "objective/train/docs_used": 1120511, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.497575283050537, "objective/train/original_loss": 2.497575283050537, "objective/train/theoretical_loss": 3.433113160876953, "objective/train/tokens_used": 339946976, "objective/train/value_avg": -0.017059326171875, "objective/train/value_loss": 0.0014832473825663328, "objective/train/value_max": -0.0006537437438964844, "objective/train/value_min": -0.787109375, "objective/train/value_reward_corr": 0.2923333272442959, "objective/train/value_std": 0.0176239013671875, "objective/train/weight_avg": 1.0011037588119507, "objective/train/weighted_lm_loss": 2.5005812644958496, "objective/train/weights_max": 1.027716875076294, "objective/train/weights_min": 0.9422877430915833, "theoretical_loss": 3.433113160876953, "tokens_seen": 1981087744 }, { "epoch": 0.2, "learning_rate": 0.0008131264144843194, "loss": 1.3386, "theoretical_loss": 3.4330657312238437, "tokens_seen": 1981415424 }, { "epoch": 0.2, "learning_rate": 0.0008128031037827353, "loss": 1.3458, "theoretical_loss": 3.432989864658313, "tokens_seen": 1981939712 }, { "epoch": 0.2, "learning_rate": 0.0008124797930811511, "loss": 1.3462, "theoretical_loss": 3.432914023777001, "tokens_seen": 1982464000 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.011712010018527508, "objective/train/docs_used": 1121670, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.745048761367798, "objective/train/original_loss": 2.7450485229492188, "objective/train/theoretical_loss": 3.432876112963088, "objective/train/tokens_used": 341585376, "objective/train/value_avg": -0.026611328125, "objective/train/value_loss": 0.002689528977498412, "objective/train/value_max": -0.0008897781372070312, "objective/train/value_min": -0.970703125, "objective/train/value_reward_corr": 0.6327339110057676, "objective/train/value_std": 0.053802490234375, "objective/train/weight_avg": 1.0011847019195557, "objective/train/weighted_lm_loss": 2.7490196228027344, "objective/train/weights_max": 1.0853735208511353, "objective/train/weights_min": 0.9105061292648315, "theoretical_loss": 3.432876112963088, "tokens_seen": 1982726144 }, { "epoch": 0.2, "learning_rate": 0.0008121564823795667, "loss": 1.3128, "theoretical_loss": 3.4328382085644233, "tokens_seen": 1982988288 }, { "epoch": 0.2, "learning_rate": 0.0008118331716779826, "loss": 1.3337, "theoretical_loss": 3.4327624190051087, "tokens_seen": 1983512576 }, { "epoch": 0.2, "learning_rate": 0.0008115098609763983, "loss": 1.3272, "theoretical_loss": 3.4326866550836, "tokens_seen": 1984036864 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.007502423133701086, "objective/train/docs_used": 1122413, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8513760566711426, "objective/train/original_loss": 2.8513758182525635, "objective/train/theoretical_loss": 3.4326393156449924, "objective/train/tokens_used": 343223776, "objective/train/value_avg": -0.017913818359375, "objective/train/value_loss": 0.001874406123533845, "objective/train/value_max": -0.0007700920104980469, "objective/train/value_min": -0.52490234375, "objective/train/value_reward_corr": 0.43204026825274394, "objective/train/value_std": 0.019561767578125, "objective/train/weight_avg": 1.000759482383728, "objective/train/weighted_lm_loss": 2.8541436195373535, "objective/train/weights_max": 1.0328091382980347, "objective/train/weights_min": 0.9301281571388245, "theoretical_loss": 3.4326393156449924, "tokens_seen": 1984364544 }, { "epoch": 0.2, "learning_rate": 0.0008111865502748141, "loss": 1.3718, "theoretical_loss": 3.432610916784453, "tokens_seen": 1984561152 }, { "epoch": 0.2, "learning_rate": 0.00081086323957323, "loss": 1.2912, "theoretical_loss": 3.4325352040922366, "tokens_seen": 1985085440 }, { "epoch": 0.2, "learning_rate": 0.0008105399288716456, "loss": 1.3042, "theoretical_loss": 3.4324595169915337, "tokens_seen": 1985609728 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.0061935000121593475, "objective/train/docs_used": 1123893, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.781351089477539, "objective/train/original_loss": 2.78135085105896, "objective/train/theoretical_loss": 3.432402768451171, "objective/train/tokens_used": 344862176, "objective/train/value_avg": -0.01629638671875, "objective/train/value_loss": 0.0029208620544523, "objective/train/value_max": -0.000263214111328125, "objective/train/value_min": -0.5595703125, "objective/train/value_reward_corr": 0.2688521270924656, "objective/train/value_std": 0.01456451416015625, "objective/train/weight_avg": 1.000633716583252, "objective/train/weighted_lm_loss": 2.783704996109009, "objective/train/weights_max": 1.0114845037460327, "objective/train/weights_min": 0.9174832105636597, "theoretical_loss": 3.432402768451171, "tokens_seen": 1986002944 }, { "epoch": 0.2, "learning_rate": 0.0008102166181700615, "loss": 1.3102, "theoretical_loss": 3.432383855466941, "tokens_seen": 1986134016 }, { "epoch": 0.2, "learning_rate": 0.0008098933074684772, "loss": 1.3364, "theoretical_loss": 3.4323082195030667, "tokens_seen": 1986658304 }, { "epoch": 0.2, "learning_rate": 0.000809569996766893, "loss": 1.36, "theoretical_loss": 3.4322326090845348, "tokens_seen": 1987182592 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.007247402798384428, "objective/train/docs_used": 1124511, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9051754474639893, "objective/train/original_loss": 2.9051754474639893, "objective/train/theoretical_loss": 3.432166470911403, "objective/train/tokens_used": 346500576, "objective/train/value_avg": -0.037445068359375, "objective/train/value_loss": 0.007051642052829266, "objective/train/value_max": -0.0009326934814453125, "objective/train/value_min": -0.8955078125, "objective/train/value_reward_corr": 0.4457277595635758, "objective/train/value_std": 0.04571533203125, "objective/train/weight_avg": 1.000759482383728, "objective/train/weighted_lm_loss": 2.906989336013794, "objective/train/weights_max": 1.0719449520111084, "objective/train/weights_min": 0.9089248180389404, "theoretical_loss": 3.432166470911403, "tokens_seen": 1987641344 }, { "epoch": 0.2, "learning_rate": 0.0008092466860653088, "loss": 1.3189, "theoretical_loss": 3.4321570241959796, "tokens_seen": 1987706880 }, { "epoch": 0.2, "learning_rate": 0.0008089233753637245, "loss": 1.3282, "theoretical_loss": 3.4320814648220512, "tokens_seen": 1988231168 }, { "epoch": 0.2, "learning_rate": 0.0008086000646621404, "loss": 1.3495, "theoretical_loss": 3.4320059309474127, "tokens_seen": 1988755456 }, { "debugging/Self-BLEU-5": 0.45919922993940515, "debugging/distinct-1-grams": 0.7336741263873471, "debugging/distinct-2-grams": 0.934964573552188, "debugging/entropy-1-grams": 5.750338430717495, "debugging/entropy-2-grams": 6.671726291653881, "debugging/length": 498.25, "debugging/num_segments": 12, "debugging/raw_token_scores_avg": 0.020326539874076843, "debugging/raw_token_scores_std": 0.07355630397796631, "epoch": 0.2, "objective/train/advantage_avg": 0.0093521848320961, "objective/train/docs_used": 1125297, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4768264293670654, "objective/train/original_loss": 2.4768264293670654, "objective/train/theoretical_loss": 3.4319304225567393, "objective/train/tokens_used": 348138976, "objective/train/value_avg": -0.029693603515625, "objective/train/value_loss": 0.004905423615127802, "objective/train/value_max": -0.0004012584686279297, "objective/train/value_min": -0.9296875, "objective/train/value_reward_corr": 0.3715905734299166, "objective/train/value_std": 0.03961181640625, "objective/train/weight_avg": 1.0009593963623047, "objective/train/weighted_lm_loss": 2.4794206619262695, "objective/train/weights_max": 1.0563794374465942, "objective/train/weights_min": 0.9083547592163086, "theoretical_loss": 3.4319304225567393, "tokens_seen": 1989279744 }, { "epoch": 0.2, "learning_rate": 0.0008082767539605561, "loss": 1.335, "theoretical_loss": 3.4319304225567393, "tokens_seen": 1989279744 }, { "epoch": 0.2, "learning_rate": 0.0008079534432589719, "loss": 1.3045, "theoretical_loss": 3.4318549396347198, "tokens_seen": 1989804032 }, { "epoch": 0.2, "learning_rate": 0.0008076301325573877, "loss": 1.3335, "theoretical_loss": 3.4317794821660565, "tokens_seen": 1990328320 }, { "epoch": 0.2, "learning_rate": 0.0008073068218558034, "loss": 1.3081, "theoretical_loss": 3.4317040501354654, "tokens_seen": 1990852608 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00752542307600379, "objective/train/docs_used": 1126404, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5866482257843018, "objective/train/original_loss": 2.5866479873657227, "objective/train/theoretical_loss": 3.4316946229194953, "objective/train/tokens_used": 349777376, "objective/train/value_avg": -0.02001953125, "objective/train/value_loss": 0.0011186557821929455, "objective/train/value_max": -0.0007352828979492188, "objective/train/value_min": -0.241455078125, "objective/train/value_reward_corr": 0.5203756882581094, "objective/train/value_std": 0.025146484375, "objective/train/weight_avg": 1.0007580518722534, "objective/train/weighted_lm_loss": 2.5894386768341064, "objective/train/weights_max": 1.019500732421875, "objective/train/weights_min": 0.9082831740379333, "theoretical_loss": 3.4316946229194953, "tokens_seen": 1990918144 }, { "epoch": 0.2, "learning_rate": 0.0008069835111542192, "loss": 1.2879, "theoretical_loss": 3.4316286435276746, "tokens_seen": 1991376896 }, { "epoch": 0.2, "learning_rate": 0.000806660200452635, "loss": 1.3344, "theoretical_loss": 3.4315532623274265, "tokens_seen": 1991901184 }, { "epoch": 0.2, "learning_rate": 0.0008063368897510508, "loss": 1.335, "theoretical_loss": 3.4314779065194765, "tokens_seen": 1992425472 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00491110235452652, "objective/train/docs_used": 1126980, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.18711519241333, "objective/train/original_loss": 2.187114953994751, "objective/train/theoretical_loss": 3.43145907153325, "objective/train/tokens_used": 351415776, "objective/train/value_avg": -0.0193634033203125, "objective/train/value_loss": 0.0033079490531235933, "objective/train/value_max": -0.0009508132934570312, "objective/train/value_min": -0.951171875, "objective/train/value_reward_corr": 0.5122830470860366, "objective/train/value_std": 0.03399658203125, "objective/train/weight_avg": 1.0005073547363281, "objective/train/weighted_lm_loss": 2.1885595321655273, "objective/train/weights_max": 1.0667246580123901, "objective/train/weights_min": 0.9120264649391174, "theoretical_loss": 3.43145907153325, "tokens_seen": 1992556544 }, { "epoch": 0.2, "learning_rate": 0.0008060135790494666, "loss": 1.295, "theoretical_loss": 3.4314025760885913, "tokens_seen": 1992949760 }, { "epoch": 0.2, "learning_rate": 0.0008056902683478823, "loss": 1.2799, "theoretical_loss": 3.431327271019553, "tokens_seen": 1993474048 }, { "epoch": 0.2, "learning_rate": 0.000805366957646298, "loss": 1.2878, "theoretical_loss": 3.4312519912971564, "tokens_seen": 1993998336 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00864596851170063, "objective/train/docs_used": 1128284, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6948323249816895, "objective/train/original_loss": 2.6948318481445312, "objective/train/theoretical_loss": 3.4312237679328375, "objective/train/tokens_used": 353054176, "objective/train/value_avg": -0.0181427001953125, "objective/train/value_loss": 0.0016371870879083872, "objective/train/value_max": -0.0006117820739746094, "objective/train/value_min": -0.448486328125, "objective/train/value_reward_corr": 0.2343023093411064, "objective/train/value_std": 0.017303466796875, "objective/train/weight_avg": 1.0008727312088013, "objective/train/weighted_lm_loss": 2.698033094406128, "objective/train/weights_max": 1.0238959789276123, "objective/train/weights_min": 0.9340118765830994, "theoretical_loss": 3.4312237679328375, "tokens_seen": 1994194944 }, { "epoch": 0.2, "learning_rate": 0.0008050436469447139, "loss": 1.3316, "theoretical_loss": 3.4311767369062087, "tokens_seen": 1994522624 }, { "epoch": 0.2, "learning_rate": 0.0008047203362431297, "loss": 1.3218, "theoretical_loss": 3.4311015078315314, "tokens_seen": 1995046912 }, { "epoch": 0.2, "learning_rate": 0.0008043970255415454, "loss": 1.2894, "theoretical_loss": 3.4310263040579567, "tokens_seen": 1995571200 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.007215326186269522, "objective/train/docs_used": 1128921, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.819326400756836, "objective/train/original_loss": 2.819326400756836, "objective/train/theoretical_loss": 3.430988711654347, "objective/train/tokens_used": 354692576, "objective/train/value_avg": -0.0223846435546875, "objective/train/value_loss": 0.003470795229077339, "objective/train/value_max": -0.0006070137023925781, "objective/train/value_min": -0.58349609375, "objective/train/value_reward_corr": 0.3818001853453515, "objective/train/value_std": 0.0248565673828125, "objective/train/weight_avg": 1.0007386207580566, "objective/train/weighted_lm_loss": 2.8222203254699707, "objective/train/weights_max": 1.0419864654541016, "objective/train/weights_min": 0.916581392288208, "theoretical_loss": 3.430988711654347, "tokens_seen": 1995833344 }, { "epoch": 0.2, "learning_rate": 0.0008040737148399612, "loss": 1.2994, "theoretical_loss": 3.430951125570332, "tokens_seen": 1996095488 }, { "epoch": 0.2, "learning_rate": 0.000803750404138377, "loss": 1.3133, "theoretical_loss": 3.430875972353518, "tokens_seen": 1996619776 }, { "epoch": 0.2, "learning_rate": 0.0008034270934367928, "loss": 1.2889, "theoretical_loss": 3.430800844392387, "tokens_seen": 1997144064 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.008433956652879715, "objective/train/docs_used": 1129917, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.387760639190674, "objective/train/original_loss": 2.3877604007720947, "objective/train/theoretical_loss": 3.4307539022351152, "objective/train/tokens_used": 356330976, "objective/train/value_avg": -0.015625, "objective/train/value_loss": 0.0014681939501315355, "objective/train/value_max": -0.0008039474487304688, "objective/train/value_min": -0.343017578125, "objective/train/value_reward_corr": 0.21998389911743252, "objective/train/value_std": 0.01561737060546875, "objective/train/weight_avg": 1.0008506774902344, "objective/train/weighted_lm_loss": 2.390482187271118, "objective/train/weights_max": 1.0302027463912964, "objective/train/weights_min": 0.937621533870697, "theoretical_loss": 3.4307539022351152, "tokens_seen": 1997471744 }, { "epoch": 0.21, "learning_rate": 0.0008031037827352086, "loss": 1.3332, "theoretical_loss": 3.430725741671825, "tokens_seen": 1997668352 }, { "epoch": 0.21, "learning_rate": 0.0008027804720336243, "loss": 1.319, "theoretical_loss": 3.4306506641767305, "tokens_seen": 1998192640 }, { "epoch": 0.21, "learning_rate": 0.0008024571613320401, "loss": 1.3295, "theoretical_loss": 3.4305756118920163, "tokens_seen": 1998716928 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.009194849990308285, "objective/train/docs_used": 1130465, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.83764386177063, "objective/train/original_loss": 2.8376431465148926, "objective/train/theoretical_loss": 3.4305193392137228, "objective/train/tokens_used": 357969376, "objective/train/value_avg": -0.01837158203125, "objective/train/value_loss": 0.0028978907503187656, "objective/train/value_max": -0.0004477500915527344, "objective/train/value_min": -0.611328125, "objective/train/value_reward_corr": 0.2953871715960515, "objective/train/value_std": 0.0203704833984375, "objective/train/weight_avg": 1.0009337663650513, "objective/train/weighted_lm_loss": 2.8412535190582275, "objective/train/weights_max": 1.0388432741165161, "objective/train/weights_min": 0.910977303981781, "theoretical_loss": 3.4305193392137228, "tokens_seen": 1999110144 }, { "epoch": 0.21, "learning_rate": 0.0008021338506304558, "loss": 1.3098, "theoretical_loss": 3.4305005848026062, "tokens_seen": 1999241216 }, { "epoch": 0.21, "learning_rate": 0.0008018105399288716, "loss": 1.2904, "theoretical_loss": 3.430425582893439, "tokens_seen": 1999765504 }, { "epoch": 0.21, "learning_rate": 0.0008014872292272875, "loss": 1.3151, "theoretical_loss": 3.430350606149465, "tokens_seen": 2000289792 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.002036882098764181, "objective/train/docs_used": 1131148, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6366372108459473, "objective/train/original_loss": 2.63663649559021, "objective/train/theoretical_loss": 3.43028502212999, "objective/train/tokens_used": 359607776, "objective/train/value_avg": -0.05303955078125, "objective/train/value_loss": 0.01327978353947401, "objective/train/value_max": -0.0006566047668457031, "objective/train/value_min": -0.98291015625, "objective/train/value_reward_corr": 0.6956969234199766, "objective/train/value_std": 0.1102294921875, "objective/train/weight_avg": 1.000269889831543, "objective/train/weighted_lm_loss": 2.6377968788146973, "objective/train/weights_max": 1.1010630130767822, "objective/train/weights_min": 0.9072142243385315, "theoretical_loss": 3.43028502212999, "tokens_seen": 2000748544 }, { "epoch": 0.21, "learning_rate": 0.0008011639185257032, "loss": 1.3042, "theoretical_loss": 3.4302756545556488, "tokens_seen": 2000814080 }, { "epoch": 0.21, "learning_rate": 0.0008008406078241191, "loss": 1.3271, "theoretical_loss": 3.4302007280969655, "tokens_seen": 2001338368 }, { "epoch": 0.21, "learning_rate": 0.0008005172971225347, "loss": 1.2576, "theoretical_loss": 3.430125826758406, "tokens_seen": 2001862656 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.004553742706775665, "objective/train/docs_used": 1132271, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9465506076812744, "objective/train/original_loss": 2.9465503692626953, "objective/train/theoretical_loss": 3.4300509505249726, "objective/train/tokens_used": 361246176, "objective/train/value_avg": -0.0367431640625, "objective/train/value_loss": 0.011281573213636875, "objective/train/value_max": -0.0010480880737304688, "objective/train/value_min": -0.99560546875, "objective/train/value_reward_corr": 0.41586561058323857, "objective/train/value_std": 0.06671142578125, "objective/train/weight_avg": 1.0005110502243042, "objective/train/weighted_lm_loss": 2.9502787590026855, "objective/train/weights_max": 1.0581728219985962, "objective/train/weights_min": 0.9085384607315063, "theoretical_loss": 3.4300509505249726, "tokens_seen": 2002386944 }, { "epoch": 0.21, "learning_rate": 0.0008001939864209505, "loss": 1.3042, "theoretical_loss": 3.4300509505249726, "tokens_seen": 2002386944 }, { "epoch": 0.21, "learning_rate": 0.0007998706757193664, "loss": 1.2959, "theoretical_loss": 3.42997609938168, "tokens_seen": 2002911232 }, { "epoch": 0.21, "learning_rate": 0.0007995473650177821, "loss": 1.3391, "theoretical_loss": 3.4299012733135577, "tokens_seen": 2003435520 }, { "epoch": 0.21, "learning_rate": 0.000799224054316198, "loss": 1.3248, "theoretical_loss": 3.429826472305645, "tokens_seen": 2003959808 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.00868127029389143, "objective/train/docs_used": 1132794, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6986467838287354, "objective/train/original_loss": 2.6986470222473145, "objective/train/theoretical_loss": 3.429817123940958, "objective/train/tokens_used": 362884576, "objective/train/value_avg": -0.0229034423828125, "objective/train/value_loss": 0.005862801801413298, "objective/train/value_max": -0.00064849853515625, "objective/train/value_min": -0.98876953125, "objective/train/value_reward_corr": 0.626960351322974, "objective/train/value_std": 0.061492919921875, "objective/train/weight_avg": 1.00089693069458, "objective/train/weighted_lm_loss": 2.701108455657959, "objective/train/weights_max": 1.0847928524017334, "objective/train/weights_min": 0.9090127348899841, "theoretical_loss": 3.429817123940958, "tokens_seen": 2004025344 }, { "epoch": 0.21, "learning_rate": 0.0007989007436146136, "loss": 1.3467, "theoretical_loss": 3.429751696342997, "tokens_seen": 2004484096 }, { "epoch": 0.21, "learning_rate": 0.0007985774329130294, "loss": 1.3336, "theoretical_loss": 3.429676945410681, "tokens_seen": 2005008384 }, { "epoch": 0.21, "learning_rate": 0.0007982541222114453, "loss": 1.3249, "theoretical_loss": 3.4296022194937748, "tokens_seen": 2005532672 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.009838742204010487, "objective/train/docs_used": 1134045, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.355100154876709, "objective/train/original_loss": 2.355099678039551, "objective/train/theoretical_loss": 3.429583541921459, "objective/train/tokens_used": 364522976, "objective/train/value_avg": -0.0206146240234375, "objective/train/value_loss": 0.002698291325941682, "objective/train/value_max": -0.0008230209350585938, "objective/train/value_min": -0.9716796875, "objective/train/value_reward_corr": 0.5097419146267704, "objective/train/value_std": 0.036407470703125, "objective/train/weight_avg": 1.0009970664978027, "objective/train/weighted_lm_loss": 2.358076572418213, "objective/train/weights_max": 1.0730022192001343, "objective/train/weights_min": 0.9073347449302673, "theoretical_loss": 3.429583541921459, "tokens_seen": 2005663744 }, { "epoch": 0.21, "learning_rate": 0.000797930811509861, "loss": 1.3181, "theoretical_loss": 3.4295275185773715, "tokens_seen": 2006056960 }, { "epoch": 0.21, "learning_rate": 0.0007976075008082768, "loss": 1.3323, "theoretical_loss": 3.429452842646577, "tokens_seen": 2006581248 }, { "epoch": 0.21, "learning_rate": 0.0007972841901066925, "loss": 1.3614, "theoretical_loss": 3.4293781916865083, "tokens_seen": 2007105536 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.009344134479761124, "objective/train/docs_used": 1134700, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.579394578933716, "objective/train/original_loss": 2.5793943405151367, "objective/train/theoretical_loss": 3.4293502040112127, "objective/train/tokens_used": 366161376, "objective/train/value_avg": -0.02801513671875, "objective/train/value_loss": 0.004120124038308859, "objective/train/value_max": -0.00047850608825683594, "objective/train/value_min": -0.751953125, "objective/train/value_reward_corr": 0.46115428348024134, "objective/train/value_std": 0.038421630859375, "objective/train/weight_avg": 1.0009547472000122, "objective/train/weighted_lm_loss": 2.5820555686950684, "objective/train/weights_max": 1.0378410816192627, "objective/train/weights_min": 0.9187839031219482, "theoretical_loss": 3.4293502040112127, "tokens_seen": 2007302144 }, { "epoch": 0.21, "learning_rate": 0.0007969608794051083, "loss": 1.34, "theoretical_loss": 3.429303565682296, "tokens_seen": 2007629824 }, { "epoch": 0.21, "learning_rate": 0.0007966375687035242, "loss": 1.3393, "theoretical_loss": 3.429228964619084, "tokens_seen": 2008154112 }, { "epoch": 0.21, "learning_rate": 0.0007963142580019399, "loss": 1.305, "theoretical_loss": 3.4291543884820275, "tokens_seen": 2008678400 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.00734796654433012, "objective/train/docs_used": 1135755, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4494006633758545, "objective/train/original_loss": 2.4494004249572754, "objective/train/theoretical_loss": 3.429117109756173, "objective/train/tokens_used": 367799776, "objective/train/value_avg": -0.0195159912109375, "objective/train/value_loss": 0.0025082111824303865, "objective/train/value_max": -0.0005211830139160156, "objective/train/value_min": -0.966796875, "objective/train/value_reward_corr": 0.5501816371092204, "objective/train/value_std": 0.03851318359375, "objective/train/weight_avg": 1.0007470846176147, "objective/train/weighted_lm_loss": 2.4519846439361572, "objective/train/weights_max": 1.0421074628829956, "objective/train/weights_min": 0.9102404117584229, "theoretical_loss": 3.429117109756173, "tokens_seen": 2008940544 }, { "epoch": 0.21, "learning_rate": 0.0007959909473003557, "loss": 1.3275, "theoretical_loss": 3.429079837256296, "tokens_seen": 2009202688 }, { "epoch": 0.21, "learning_rate": 0.0007956676365987714, "loss": 1.3239, "theoretical_loss": 3.429005310927071, "tokens_seen": 2009726976 }, { "epoch": 0.21, "learning_rate": 0.0007953443258971872, "loss": 1.3691, "theoretical_loss": 3.4289308094795463, "tokens_seen": 2010251264 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.01097910851240158, "objective/train/docs_used": 1136767, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.548217296600342, "objective/train/original_loss": 2.5482168197631836, "objective/train/theoretical_loss": 3.428884258703508, "objective/train/tokens_used": 369438176, "objective/train/value_avg": -0.01471710205078125, "objective/train/value_loss": 0.000262704910710454, "objective/train/value_max": -0.0006265640258789062, "objective/train/value_min": -0.24072265625, "objective/train/value_reward_corr": 0.236570649419404, "objective/train/value_std": 0.01114654541015625, "objective/train/weight_avg": 1.0010992288589478, "objective/train/weighted_lm_loss": 2.551769256591797, "objective/train/weights_max": 1.018768310546875, "objective/train/weights_min": 0.9921634793281555, "theoretical_loss": 3.428884258703508, "tokens_seen": 2010578944 }, { "epoch": 0.21, "learning_rate": 0.0007950210151956029, "loss": 1.3496, "theoretical_loss": 3.4288563328989285, "tokens_seen": 2010775552 }, { "epoch": 0.21, "learning_rate": 0.0007946977044940188, "loss": 1.3693, "theoretical_loss": 3.428781881170438, "tokens_seen": 2011299840 }, { "epoch": 0.21, "learning_rate": 0.0007943743937924346, "loss": 1.3289, "theoretical_loss": 3.428707454279305, "tokens_seen": 2011824128 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.012145849876105785, "objective/train/docs_used": 1137326, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.619305372238159, "objective/train/original_loss": 2.6193056106567383, "objective/train/theoretical_loss": 3.428651650401596, "objective/train/tokens_used": 371076576, "objective/train/value_avg": -0.0198974609375, "objective/train/value_loss": 0.0032746356446295977, "objective/train/value_max": -0.001201629638671875, "objective/train/value_min": -0.98828125, "objective/train/value_reward_corr": 0.40681656024222207, "objective/train/value_std": 0.0283050537109375, "objective/train/weight_avg": 1.0012305974960327, "objective/train/weighted_lm_loss": 2.6225292682647705, "objective/train/weights_max": 1.039466142654419, "objective/train/weights_min": 0.9109441041946411, "theoretical_loss": 3.428651650401596, "tokens_seen": 2012217344 }, { "epoch": 0.21, "learning_rate": 0.0007940510830908503, "loss": 1.3301, "theoretical_loss": 3.4286330522107766, "tokens_seen": 2012348416 }, { "epoch": 0.21, "learning_rate": 0.0007937277723892661, "loss": 1.374, "theoretical_loss": 3.4285586749501085, "tokens_seen": 2012872704 }, { "epoch": 0.21, "learning_rate": 0.0007934044616876818, "loss": 1.3737, "theoretical_loss": 3.4284843224825714, "tokens_seen": 2013396992 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.006908084731549025, "objective/train/docs_used": 1138753, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8655903339385986, "objective/train/original_loss": 2.8655900955200195, "objective/train/theoretical_loss": 3.4284192844000208, "objective/train/tokens_used": 372714976, "objective/train/value_avg": -0.041168212890625, "objective/train/value_loss": 0.005510091315954924, "objective/train/value_max": -0.00047278404235839844, "objective/train/value_min": -0.98388671875, "objective/train/value_reward_corr": 0.8098382196546917, "objective/train/value_std": 0.0947265625, "objective/train/weight_avg": 1.0007179975509644, "objective/train/weighted_lm_loss": 2.8679168224334717, "objective/train/weights_max": 1.0945748090744019, "objective/train/weights_min": 0.9099749326705933, "theoretical_loss": 3.4284192844000208, "tokens_seen": 2013855744 }, { "epoch": 0.22, "learning_rate": 0.0007930811509860977, "loss": 1.3529, "theoretical_loss": 3.428409994793447, "tokens_seen": 2013921280 }, { "epoch": 0.22, "learning_rate": 0.0007927578402845135, "loss": 1.3662, "theoretical_loss": 3.428335691868031, "tokens_seen": 2014445568 }, { "epoch": 0.22, "learning_rate": 0.0007924345295829291, "loss": 1.3878, "theoretical_loss": 3.4282614136916307, "tokens_seen": 2014969856 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0127880172803998, "objective/train/docs_used": 1139429, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5890555381774902, "objective/train/original_loss": 2.5890562534332275, "objective/train/theoretical_loss": 3.428187160249567, "objective/train/tokens_used": 374353376, "objective/train/value_avg": -0.0203094482421875, "objective/train/value_loss": 0.0010899299522861838, "objective/train/value_max": -0.000820159912109375, "objective/train/value_min": -0.26953125, "objective/train/value_reward_corr": 0.29170655628896963, "objective/train/value_std": 0.017181396484375, "objective/train/weight_avg": 1.0012842416763306, "objective/train/weighted_lm_loss": 2.5927858352661133, "objective/train/weights_max": 1.019420862197876, "objective/train/weights_min": 0.9544306397438049, "theoretical_loss": 3.428187160249567, "tokens_seen": 2015494144 }, { "epoch": 0.22, "learning_rate": 0.000792111218881345, "loss": 1.3689, "theoretical_loss": 3.428187160249567, "tokens_seen": 2015494144 }, { "epoch": 0.22, "learning_rate": 0.0007917879081797607, "loss": 1.3274, "theoretical_loss": 3.428112931527171, "tokens_seen": 2016018432 }, { "epoch": 0.22, "learning_rate": 0.0007914645974781766, "loss": 1.3295, "theoretical_loss": 3.428038727509789, "tokens_seen": 2016542720 }, { "epoch": 0.22, "learning_rate": 0.0007911412867765924, "loss": 1.3484, "theoretical_loss": 3.427964548182779, "tokens_seen": 2017067008 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.011586609296500683, "objective/train/docs_used": 1140637, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1325912475585938, "objective/train/original_loss": 3.1325907707214355, "objective/train/theoretical_loss": 3.4279552775022157, "objective/train/tokens_used": 375991776, "objective/train/value_avg": -0.037078857421875, "objective/train/value_loss": 0.002670668065547943, "objective/train/value_max": -0.000865936279296875, "objective/train/value_min": -0.59619140625, "objective/train/value_reward_corr": 0.8204117531581767, "objective/train/value_std": 0.0797119140625, "objective/train/weight_avg": 1.0011720657348633, "objective/train/weighted_lm_loss": 3.137453317642212, "objective/train/weights_max": 1.055580735206604, "objective/train/weights_min": 0.955090343952179, "theoretical_loss": 3.4279552775022157, "tokens_seen": 2017132544 }, { "epoch": 0.22, "learning_rate": 0.000790817976075008, "loss": 1.3624, "theoretical_loss": 3.42789039353151, "tokens_seen": 2017591296 }, { "epoch": 0.22, "learning_rate": 0.0007904946653734239, "loss": 1.3639, "theoretical_loss": 3.4278162635413656, "tokens_seen": 2018115584 }, { "epoch": 0.22, "learning_rate": 0.0007901713546718396, "loss": 1.323, "theoretical_loss": 3.4277421581977405, "tokens_seen": 2018639872 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.005529832560569048, "objective/train/docs_used": 1141300, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1447408199310303, "objective/train/original_loss": 3.144740581512451, "objective/train/theoretical_loss": 3.4277236357111427, "objective/train/tokens_used": 377630176, "objective/train/value_avg": -0.0384521484375, "objective/train/value_loss": 0.005796557292342186, "objective/train/value_max": -0.00042724609375, "objective/train/value_min": -0.99169921875, "objective/train/value_reward_corr": 0.8686318343959792, "objective/train/value_std": 0.1064453125, "objective/train/weight_avg": 1.0005813837051392, "objective/train/weighted_lm_loss": 3.146066904067993, "objective/train/weights_max": 1.0881203413009644, "objective/train/weights_min": 0.9076492190361023, "theoretical_loss": 3.4277236357111427, "tokens_seen": 2018770944 }, { "epoch": 0.22, "learning_rate": 0.0007898480439702555, "loss": 1.3342, "theoretical_loss": 3.4276680774860426, "tokens_seen": 2019164160 }, { "epoch": 0.22, "learning_rate": 0.0007895247332686713, "loss": 1.3589, "theoretical_loss": 3.427594021391691, "tokens_seen": 2019688448 }, { "epoch": 0.22, "learning_rate": 0.0007892014225670869, "loss": 1.3269, "theoretical_loss": 3.4275199899001185, "tokens_seen": 2020212736 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.004466152749955654, "objective/train/docs_used": 1142665, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5924227237701416, "objective/train/original_loss": 2.5924222469329834, "objective/train/theoretical_loss": 3.427492234430712, "objective/train/tokens_used": 379268576, "objective/train/value_avg": -0.039337158203125, "objective/train/value_loss": 0.004787075333297253, "objective/train/value_max": -0.00070953369140625, "objective/train/value_min": -0.8408203125, "objective/train/value_reward_corr": 0.8385595825887284, "objective/train/value_std": 0.09552001953125, "objective/train/weight_avg": 1.0004702806472778, "objective/train/weighted_lm_loss": 2.5934784412384033, "objective/train/weights_max": 1.0609867572784424, "objective/train/weights_min": 0.9210997819900513, "theoretical_loss": 3.427492234430712, "tokens_seen": 2020409344 }, { "epoch": 0.22, "learning_rate": 0.0007888781118655028, "loss": 1.3509, "theoretical_loss": 3.4274459829967703, "tokens_seen": 2020737024 }, { "epoch": 0.22, "learning_rate": 0.0007885548011639185, "loss": 1.3521, "theoretical_loss": 3.427372000667103, "tokens_seen": 2021261312 }, { "epoch": 0.22, "learning_rate": 0.0007882314904623343, "loss": 1.3384, "theoretical_loss": 3.427298042896586, "tokens_seen": 2021785600 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.010508273728191853, "objective/train/docs_used": 1143239, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0476982593536377, "objective/train/original_loss": 3.0476982593536377, "objective/train/theoretical_loss": 3.4272610732164717, "objective/train/tokens_used": 380906976, "objective/train/value_avg": -0.016357421875, "objective/train/value_loss": 0.0008057131781242788, "objective/train/value_max": -0.0006694793701171875, "objective/train/value_min": -0.432373046875, "objective/train/value_reward_corr": 0.3959332526082542, "objective/train/value_std": 0.017974853515625, "objective/train/weight_avg": 1.0010548830032349, "objective/train/weighted_lm_loss": 3.0517499446868896, "objective/train/weights_max": 1.0284278392791748, "objective/train/weights_min": 0.9650006890296936, "theoretical_loss": 3.4272610732164717, "tokens_seen": 2022047744 }, { "epoch": 0.22, "learning_rate": 0.0007879081797607502, "loss": 1.3406, "theoretical_loss": 3.4272241096707012, "tokens_seen": 2022309888 }, { "epoch": 0.22, "learning_rate": 0.0007875848690591658, "loss": 1.3463, "theoretical_loss": 3.427150200974943, "tokens_seen": 2022834176 }, { "epoch": 0.22, "learning_rate": 0.0007872615583575817, "loss": 1.3435, "theoretical_loss": 3.4270763167948184, "tokens_seen": 2023358464 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.005469959229230881, "objective/train/docs_used": 1144661, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2179784774780273, "objective/train/original_loss": 2.2179782390594482, "objective/train/theoretical_loss": 3.42703015162515, "objective/train/tokens_used": 382545376, "objective/train/value_avg": -0.01555633544921875, "objective/train/value_loss": 0.002250698395073414, "objective/train/value_max": -0.0005316734313964844, "objective/train/value_min": -0.97216796875, "objective/train/value_reward_corr": 0.431912018446703, "objective/train/value_std": 0.02618408203125, "objective/train/weight_avg": 1.0005580186843872, "objective/train/weighted_lm_loss": 2.2199788093566895, "objective/train/weights_max": 1.0479637384414673, "objective/train/weights_min": 0.9075499176979065, "theoretical_loss": 3.42703015162515, "tokens_seen": 2023686144 }, { "epoch": 0.22, "learning_rate": 0.0007869382476559974, "loss": 1.3482, "theoretical_loss": 3.427002457115845, "tokens_seen": 2023882752 }, { "epoch": 0.22, "learning_rate": 0.0007866149369544132, "loss": 1.3299, "theoretical_loss": 3.4269286219235555, "tokens_seen": 2024407040 }, { "epoch": 0.22, "learning_rate": 0.000786291626252829, "loss": 1.3258, "theoretical_loss": 3.4268548112034916, "tokens_seen": 2024931328 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.00924660637974739, "objective/train/docs_used": 1145363, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7543082237243652, "objective/train/original_loss": 2.754307985305786, "objective/train/theoretical_loss": 3.4267994692146537, "objective/train/tokens_used": 384183776, "objective/train/value_avg": -0.0290985107421875, "objective/train/value_loss": 0.005657609086483717, "objective/train/value_max": -0.0007886886596679688, "objective/train/value_min": -0.9677734375, "objective/train/value_reward_corr": 0.6020099467996785, "objective/train/value_std": 0.05255126953125, "objective/train/weight_avg": 1.0009524822235107, "objective/train/weighted_lm_loss": 2.757852792739868, "objective/train/weights_max": 1.0600982904434204, "objective/train/weights_min": 0.9102350473403931, "theoretical_loss": 3.4267994692146537, "tokens_seen": 2025324544 }, { "epoch": 0.22, "learning_rate": 0.0007859683155512448, "loss": 1.3245, "theoretical_loss": 3.426781024941211, "tokens_seen": 2025455616 }, { "epoch": 0.22, "learning_rate": 0.0007856450048496605, "loss": 1.3285, "theoretical_loss": 3.42670726312228, "tokens_seen": 2025979904 }, { "epoch": 0.22, "learning_rate": 0.0007853216941480763, "loss": 1.3207, "theoretical_loss": 3.4266335257322798, "tokens_seen": 2026504192 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.005987908691167831, "objective/train/docs_used": 1146594, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9548356533050537, "objective/train/original_loss": 2.954835891723633, "objective/train/theoretical_loss": 3.42656902554406, "objective/train/tokens_used": 385822176, "objective/train/value_avg": -0.0274658203125, "objective/train/value_loss": 0.003132613841444254, "objective/train/value_max": -0.0010166168212890625, "objective/train/value_min": -0.3876953125, "objective/train/value_reward_corr": 0.7207891848997005, "objective/train/value_std": 0.04248046875, "objective/train/weight_avg": 1.0006142854690552, "objective/train/weighted_lm_loss": 2.9564149379730225, "objective/train/weights_max": 1.0241276025772095, "objective/train/weights_min": 0.9428431987762451, "theoretical_loss": 3.42656902554406, "tokens_seen": 2026962944 }, { "epoch": 0.22, "learning_rate": 0.0007849983834464921, "loss": 1.3483, "theoretical_loss": 3.4265598127568024, "tokens_seen": 2027028480 }, { "epoch": 0.22, "learning_rate": 0.000784675072744908, "loss": 1.3551, "theoretical_loss": 3.4264861241814524, "tokens_seen": 2027552768 }, { "epoch": 0.22, "learning_rate": 0.0007843517620433237, "loss": 1.4161, "theoretical_loss": 3.4264124599918473, "tokens_seen": 2028077056 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.010803976096212864, "objective/train/docs_used": 1147311, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5807459354400635, "objective/train/original_loss": 2.5807454586029053, "objective/train/theoretical_loss": 3.4263388201736156, "objective/train/tokens_used": 387460576, "objective/train/value_avg": -0.0212249755859375, "objective/train/value_loss": 0.0031255141366273165, "objective/train/value_max": -0.0007853507995605469, "objective/train/value_min": -0.90234375, "objective/train/value_reward_corr": 0.3779798280013806, "objective/train/value_std": 0.03216552734375, "objective/train/weight_avg": 1.0010958909988403, "objective/train/weighted_lm_loss": 2.584089756011963, "objective/train/weights_max": 1.083680510520935, "objective/train/weights_min": 0.908164918422699, "theoretical_loss": 3.4263388201736156, "tokens_seen": 2028601344 }, { "epoch": 0.22, "learning_rate": 0.0007840284513417394, "loss": 1.3501, "theoretical_loss": 3.4263388201736156, "tokens_seen": 2028601344 }, { "epoch": 0.22, "learning_rate": 0.0007837051406401552, "loss": 1.338, "theoretical_loss": 3.426265204712399, "tokens_seen": 2029125632 }, { "epoch": 0.22, "learning_rate": 0.000783381829938571, "loss": 1.3342, "theoretical_loss": 3.42619161359385, "tokens_seen": 2029649920 }, { "epoch": 0.22, "learning_rate": 0.0007830585192369867, "loss": 1.3485, "theoretical_loss": 3.4261180468036354, "tokens_seen": 2030174208 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.010922583751380444, "objective/train/docs_used": 1148399, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8256478309631348, "objective/train/original_loss": 2.8256473541259766, "objective/train/theoretical_loss": 3.426108852664731, "objective/train/tokens_used": 389098976, "objective/train/value_avg": -0.0267181396484375, "objective/train/value_loss": 0.0031525518279522657, "objective/train/value_max": -0.000720977783203125, "objective/train/value_min": -0.485107421875, "objective/train/value_reward_corr": 0.5602706424114954, "objective/train/value_std": 0.042022705078125, "objective/train/weight_avg": 1.0011078119277954, "objective/train/weighted_lm_loss": 2.8297269344329834, "objective/train/weights_max": 1.0439242124557495, "objective/train/weights_min": 0.9254735112190247, "theoretical_loss": 3.426108852664731, "tokens_seen": 2030239744 }, { "epoch": 0.23, "learning_rate": 0.0007827352085354026, "loss": 1.3447, "theoretical_loss": 3.4260445043274323, "tokens_seen": 2030698496 }, { "epoch": 0.23, "learning_rate": 0.0007824118978338183, "loss": 1.3434, "theoretical_loss": 3.4259709861509307, "tokens_seen": 2031222784 }, { "epoch": 0.23, "learning_rate": 0.0007820885871322341, "loss": 1.3633, "theoretical_loss": 3.4258974922598324, "tokens_seen": 2031747072 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.012492392212152481, "objective/train/docs_used": 1149081, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.667729616165161, "objective/train/original_loss": 2.6677298545837402, "objective/train/theoretical_loss": 3.4258791225799774, "objective/train/tokens_used": 390737376, "objective/train/value_avg": -0.0211334228515625, "objective/train/value_loss": 0.0010215212823823094, "objective/train/value_max": -0.00142669677734375, "objective/train/value_min": -0.262451171875, "objective/train/value_reward_corr": 0.2824109081173769, "objective/train/value_std": 0.0186309814453125, "objective/train/weight_avg": 1.0012543201446533, "objective/train/weighted_lm_loss": 2.6716959476470947, "objective/train/weights_max": 1.0259498357772827, "objective/train/weights_min": 0.9373342394828796, "theoretical_loss": 3.4258791225799774, "tokens_seen": 2031878144 }, { "epoch": 0.23, "learning_rate": 0.0007817652764306499, "loss": 1.3831, "theoretical_loss": 3.4258240226398513, "tokens_seen": 2032271360 }, { "epoch": 0.23, "learning_rate": 0.0007814419657290656, "loss": 1.3321, "theoretical_loss": 3.4257505772767143, "tokens_seen": 2032795648 }, { "epoch": 0.23, "learning_rate": 0.0007811186550274815, "loss": 1.3088, "theoretical_loss": 3.425677156156159, "tokens_seen": 2033319936 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.01288179587572813, "objective/train/docs_used": 1149676, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5947186946868896, "objective/train/original_loss": 2.5947184562683105, "objective/train/theoretical_loss": 3.4256496294830825, "objective/train/tokens_used": 392375776, "objective/train/value_avg": -0.0182952880859375, "objective/train/value_loss": 0.0021010302007198334, "objective/train/value_max": -0.0005931854248046875, "objective/train/value_min": -0.5576171875, "objective/train/value_reward_corr": 0.24004139696232982, "objective/train/value_std": 0.0212554931640625, "objective/train/weight_avg": 1.0012985467910767, "objective/train/weighted_lm_loss": 2.5985684394836426, "objective/train/weights_max": 1.0503945350646973, "objective/train/weights_min": 0.9152799248695374, "theoretical_loss": 3.4256496294830825, "tokens_seen": 2033516544 }, { "epoch": 0.23, "learning_rate": 0.0007807953443258971, "loss": 1.3158, "theoretical_loss": 3.425603759263936, "tokens_seen": 2033844224 }, { "epoch": 0.23, "learning_rate": 0.000780472033624313, "loss": 1.3381, "theoretical_loss": 3.425530386585807, "tokens_seen": 2034368512 }, { "epoch": 0.23, "learning_rate": 0.0007801487229227288, "loss": 1.3137, "theoretical_loss": 3.425457038107547, "tokens_seen": 2034892800 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.006807017605751753, "objective/train/docs_used": 1150523, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4921724796295166, "objective/train/original_loss": 2.4921722412109375, "objective/train/theoretical_loss": 3.4254203729389254, "objective/train/tokens_used": 394014176, "objective/train/value_avg": -0.0283966064453125, "objective/train/value_loss": 0.0036921396385878325, "objective/train/value_max": -0.0008263587951660156, "objective/train/value_min": -0.623046875, "objective/train/value_reward_corr": 0.3278116441665009, "objective/train/value_std": 0.043212890625, "objective/train/weight_avg": 1.0006990432739258, "objective/train/weighted_lm_loss": 2.4964866638183594, "objective/train/weights_max": 1.0403544902801514, "objective/train/weights_min": 0.9216977953910828, "theoretical_loss": 3.4254203729389254, "tokens_seen": 2035154944 }, { "epoch": 0.23, "learning_rate": 0.0007798254122211445, "loss": 1.3164, "theoretical_loss": 3.4253837138149423, "tokens_seen": 2035417088 }, { "epoch": 0.23, "learning_rate": 0.0007795021015195604, "loss": 1.3188, "theoretical_loss": 3.425310413693791, "tokens_seen": 2035941376 }, { "epoch": 0.23, "learning_rate": 0.000779178790817976, "loss": 1.3247, "theoretical_loss": 3.4252371377299036, "tokens_seen": 2036465664 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0014161600265651941, "objective/train/docs_used": 1151732, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6390540599823, "objective/train/original_loss": 2.639054298400879, "objective/train/theoretical_loss": 3.4251913525135347, "objective/train/tokens_used": 395652576, "objective/train/value_avg": -0.029571533203125, "objective/train/value_loss": 0.003698627697303891, "objective/train/value_max": -0.0007791519165039062, "objective/train/value_min": -0.51318359375, "objective/train/value_reward_corr": 0.8270101776619015, "objective/train/value_std": 0.05596923828125, "objective/train/weight_avg": 1.0001599788665771, "objective/train/weighted_lm_loss": 2.6395108699798584, "objective/train/weights_max": 1.0353420972824097, "objective/train/weights_min": 0.9492146968841553, "theoretical_loss": 3.4251913525135347, "tokens_seen": 2036793344 }, { "epoch": 0.23, "learning_rate": 0.0007788554801163918, "loss": 1.2798, "theoretical_loss": 3.425163885909103, "tokens_seen": 2036989952 }, { "epoch": 0.23, "learning_rate": 0.0007785321694148077, "loss": 1.3236, "theoretical_loss": 3.4250906582172225, "tokens_seen": 2037514240 }, { "epoch": 0.23, "learning_rate": 0.0007782088587132234, "loss": 1.3554, "theoretical_loss": 3.425017454640109, "tokens_seen": 2038038528 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.009865707717835903, "objective/train/docs_used": 1152471, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7175018787384033, "objective/train/original_loss": 2.717501640319824, "objective/train/theoretical_loss": 3.4249625677740823, "objective/train/tokens_used": 397290976, "objective/train/value_avg": -0.01392364501953125, "objective/train/value_loss": 0.0007469551637768745, "objective/train/value_max": -0.0006022453308105469, "objective/train/value_min": -0.2359619140625, "objective/train/value_reward_corr": 0.166050630471824, "objective/train/value_std": 0.011505126953125, "objective/train/weight_avg": 1.0009902715682983, "objective/train/weighted_lm_loss": 2.720762252807617, "objective/train/weights_max": 1.0156663656234741, "objective/train/weights_min": 0.9450311660766602, "theoretical_loss": 3.4249625677740823, "tokens_seen": 2038431744 }, { "epoch": 0.23, "learning_rate": 0.0007778855480116393, "loss": 1.3498, "theoretical_loss": 3.4249442751636208, "tokens_seen": 2038562816 }, { "epoch": 0.23, "learning_rate": 0.0007775622373100549, "loss": 1.3382, "theoretical_loss": 3.424871119773628, "tokens_seen": 2039087104 }, { "epoch": 0.23, "learning_rate": 0.0007772389266084707, "loss": 1.3177, "theoretical_loss": 3.4247979884560125, "tokens_seen": 2039611392 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.012461480684578419, "objective/train/docs_used": 1153876, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.239927053451538, "objective/train/original_loss": 3.239926815032959, "objective/train/theoretical_loss": 3.4247340182888815, "objective/train/tokens_used": 398929376, "objective/train/value_avg": -0.0214385986328125, "objective/train/value_loss": 0.002763229189440608, "objective/train/value_max": -0.0009326934814453125, "objective/train/value_min": -0.94775390625, "objective/train/value_reward_corr": 0.46938232690008613, "objective/train/value_std": 0.03466796875, "objective/train/weight_avg": 1.0012598037719727, "objective/train/weighted_lm_loss": 3.244401693344116, "objective/train/weights_max": 1.0748834609985352, "objective/train/weights_min": 0.9228560924530029, "theoretical_loss": 3.4247340182888815, "tokens_seen": 2040070144 }, { "epoch": 0.23, "learning_rate": 0.0007769156159068866, "loss": 1.3264, "theoretical_loss": 3.424724881196668, "tokens_seen": 2040135680 }, { "epoch": 0.23, "learning_rate": 0.0007765923052053023, "loss": 1.2754, "theoretical_loss": 3.424651797981501, "tokens_seen": 2040659968 }, { "epoch": 0.23, "learning_rate": 0.0007762689945037181, "loss": 1.3245, "theoretical_loss": 3.4245787387964293, "tokens_seen": 2041184256 }, { "epoch": 0.23, "objective/train/advantage_avg": -0.00196394813247025, "objective/train/docs_used": 1154501, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0751397609710693, "objective/train/original_loss": 3.075139284133911, "objective/train/theoretical_loss": 3.424505703627382, "objective/train/tokens_used": 400567776, "objective/train/value_avg": -0.037841796875, "objective/train/value_loss": 0.016913242638111115, "objective/train/value_max": -0.000904083251953125, "objective/train/value_min": -0.99853515625, "objective/train/value_reward_corr": 0.5246572961536398, "objective/train/value_std": 0.09954833984375, "objective/train/weight_avg": 0.9998869895935059, "objective/train/weighted_lm_loss": 3.074571371078491, "objective/train/weights_max": 1.1027320623397827, "objective/train/weights_min": 0.9098876118659973, "theoretical_loss": 3.424505703627382, "tokens_seen": 2041708544 }, { "epoch": 0.23, "learning_rate": 0.0007759456838021338, "loss": 1.3403, "theoretical_loss": 3.424505703627382, "tokens_seen": 2041708544 }, { "epoch": 0.23, "learning_rate": 0.0007756223731005496, "loss": 1.3475, "theoretical_loss": 3.4244326924603, "tokens_seen": 2042232832 }, { "epoch": 0.23, "learning_rate": 0.0007752990623989655, "loss": 1.32, "theoretical_loss": 3.424359705281138, "tokens_seen": 2042757120 }, { "epoch": 0.23, "learning_rate": 0.0007749757516973812, "loss": 1.334, "theoretical_loss": 3.42428674207586, "tokens_seen": 2043281408 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.00885568093508482, "objective/train/docs_used": 1155127, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.216031551361084, "objective/train/original_loss": 2.216031312942505, "objective/train/theoretical_loss": 3.4242776233601653, "objective/train/tokens_used": 402206176, "objective/train/value_avg": -0.01497650146484375, "objective/train/value_loss": 0.0004260502755641937, "objective/train/value_max": -0.0010242462158203125, "objective/train/value_min": -0.1788330078125, "objective/train/value_reward_corr": 0.35892077898765157, "objective/train/value_std": 0.01175689697265625, "objective/train/weight_avg": 1.0008876323699951, "objective/train/weighted_lm_loss": 2.2186853885650635, "objective/train/weights_max": 1.0127300024032593, "objective/train/weights_min": 0.9310997128486633, "theoretical_loss": 3.4242776233601653, "tokens_seen": 2043346944 }, { "epoch": 0.23, "learning_rate": 0.000774652440995797, "loss": 1.3785, "theoretical_loss": 3.424213802830443, "tokens_seen": 2043805696 }, { "epoch": 0.23, "learning_rate": 0.0007743291302942127, "loss": 1.3444, "theoretical_loss": 3.4241408875308768, "tokens_seen": 2044329984 }, { "epoch": 0.23, "learning_rate": 0.0007740058195926285, "loss": 1.3298, "theoretical_loss": 3.42406799616316, "tokens_seen": 2044854272 }, { "epoch": 0.23, "objective/train/advantage_avg": -0.023724814876914024, "objective/train/docs_used": 1156711, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.555878162384033, "objective/train/original_loss": 2.5558784008026123, "objective/train/theoretical_loss": 3.4240497770589435, "objective/train/tokens_used": 403844576, "objective/train/value_avg": -0.0313720703125, "objective/train/value_loss": 0.019432753324508667, "objective/train/value_max": -0.0006718635559082031, "objective/train/value_min": -0.97705078125, "objective/train/value_reward_corr": 0.8064561565681317, "objective/train/value_std": 0.0540771484375, "objective/train/weight_avg": 0.9977230429649353, "objective/train/weighted_lm_loss": 2.5559499263763428, "objective/train/weights_max": 1.0440746545791626, "objective/train/weights_min": 0.9126288294792175, "theoretical_loss": 3.4240497770589435, "tokens_seen": 2044985344 }, { "epoch": 0.23, "learning_rate": 0.0007736825088910443, "loss": 1.3361, "theoretical_loss": 3.423995128713307, "tokens_seen": 2045378560 }, { "epoch": 0.23, "learning_rate": 0.0007733591981894601, "loss": 1.347, "theoretical_loss": 3.4239222851673397, "tokens_seen": 2045902848 }, { "epoch": 0.23, "learning_rate": 0.0007730358874878759, "loss": 1.3569, "theoretical_loss": 3.423849465511296, "tokens_seen": 2046427136 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.009622104465961456, "objective/train/docs_used": 1157531, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3308236598968506, "objective/train/original_loss": 2.3308234214782715, "objective/train/theoretical_loss": 3.423822164296552, "objective/train/tokens_used": 405482976, "objective/train/value_avg": -0.0172882080078125, "objective/train/value_loss": 0.0010615058708935976, "objective/train/value_max": -0.000507354736328125, "objective/train/value_min": -0.36328125, "objective/train/value_reward_corr": 0.22851459074495234, "objective/train/value_std": 0.01461029052734375, "objective/train/weight_avg": 1.0009675025939941, "objective/train/weighted_lm_loss": 2.3333802223205566, "objective/train/weights_max": 1.0163381099700928, "objective/train/weights_min": 0.9310746192932129, "theoretical_loss": 3.423822164296552, "tokens_seen": 2046623744 }, { "epoch": 0.24, "learning_rate": 0.0007727125767862916, "loss": 1.3588, "theoretical_loss": 3.4237766697312213, "tokens_seen": 2046951424 }, { "epoch": 0.24, "learning_rate": 0.0007723892660847074, "loss": 1.3181, "theoretical_loss": 3.4237038978131764, "tokens_seen": 2047475712 }, { "epoch": 0.24, "learning_rate": 0.0007720659553831231, "loss": 1.3436, "theoretical_loss": 3.4236311497432315, "tokens_seen": 2048000000 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.006551867816597223, "objective/train/docs_used": 1159006, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7512247562408447, "objective/train/original_loss": 2.7512245178222656, "objective/train/theoretical_loss": 3.423594784646947, "objective/train/tokens_used": 407121376, "objective/train/value_avg": -0.022857666015625, "objective/train/value_loss": 0.005319257266819477, "objective/train/value_max": -0.0007123947143554688, "objective/train/value_min": -0.99365234375, "objective/train/value_reward_corr": 0.46645264453060903, "objective/train/value_std": 0.03826904296875, "objective/train/weight_avg": 1.0006812810897827, "objective/train/weighted_lm_loss": 2.752868413925171, "objective/train/weights_max": 1.0869683027267456, "objective/train/weights_min": 0.9101842045783997, "theoretical_loss": 3.423594784646947, "tokens_seen": 2048262144 }, { "epoch": 0.24, "learning_rate": 0.000771742644681539, "loss": 1.2636, "theoretical_loss": 3.423558425507469, "tokens_seen": 2048524288 }, { "epoch": 0.24, "learning_rate": 0.0007714193339799548, "loss": 1.3202, "theoretical_loss": 3.423485725091984, "tokens_seen": 2049048576 }, { "epoch": 0.24, "learning_rate": 0.0007710960232783706, "loss": 1.3313, "theoretical_loss": 3.423413048482882, "tokens_seen": 2049572864 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.009631874971091747, "objective/train/docs_used": 1159734, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 1.9894094467163086, "objective/train/original_loss": 1.9894095659255981, "objective/train/theoretical_loss": 3.4233676376852036, "objective/train/tokens_used": 408759776, "objective/train/value_avg": -0.01776123046875, "objective/train/value_loss": 0.0005501412088051438, "objective/train/value_max": -0.0002758502960205078, "objective/train/value_min": -0.3173828125, "objective/train/value_reward_corr": 0.3880360018370114, "objective/train/value_std": 0.01490020751953125, "objective/train/weight_avg": 1.0009658336639404, "objective/train/weighted_lm_loss": 1.9923787117004395, "objective/train/weights_max": 1.0155773162841797, "objective/train/weights_min": 0.9532714486122131, "theoretical_loss": 3.4233676376852036, "tokens_seen": 2049900544 }, { "epoch": 0.24, "learning_rate": 0.0007707727125767863, "loss": 1.3329, "theoretical_loss": 3.423340395666281, "tokens_seen": 2050097152 }, { "epoch": 0.24, "learning_rate": 0.000770449401875202, "loss": 1.2682, "theoretical_loss": 3.42326776662831, "tokens_seen": 2050621440 }, { "epoch": 0.24, "learning_rate": 0.0007701260911736179, "loss": 1.3264, "theoretical_loss": 3.4231951613551095, "tokens_seen": 2051145728 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.014511306770145893, "objective/train/docs_used": 1160859, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7624094486236572, "objective/train/original_loss": 2.762409210205078, "objective/train/theoretical_loss": 3.423140722987509, "objective/train/tokens_used": 410398176, "objective/train/value_avg": -0.033294677734375, "objective/train/value_loss": 0.002795952605083585, "objective/train/value_max": -0.0005335807800292969, "objective/train/value_min": -0.56787109375, "objective/train/value_reward_corr": 0.7327652184987758, "objective/train/value_std": 0.06829833984375, "objective/train/weight_avg": 1.001465082168579, "objective/train/weighted_lm_loss": 2.767319917678833, "objective/train/weights_max": 1.0492616891860962, "objective/train/weights_min": 0.9489697813987732, "theoretical_loss": 3.423140722987509, "tokens_seen": 2051538944 }, { "epoch": 0.24, "learning_rate": 0.0007698027804720337, "loss": 1.3551, "theoretical_loss": 3.423122579832832, "tokens_seen": 2051670016 }, { "epoch": 0.24, "learning_rate": 0.0007694794697704494, "loss": 1.3014, "theoretical_loss": 3.4230500220476427, "tokens_seen": 2052194304 }, { "epoch": 0.24, "learning_rate": 0.0007691561590688652, "loss": 1.3171, "theoretical_loss": 3.422977487985716, "tokens_seen": 2052718592 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0069312225095927715, "objective/train/docs_used": 1161710, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3064301013946533, "objective/train/original_loss": 2.306429624557495, "objective/train/theoretical_loss": 3.422914040131161, "objective/train/tokens_used": 412036576, "objective/train/value_avg": -0.0137481689453125, "objective/train/value_loss": 0.0006592646823264658, "objective/train/value_max": -0.0002892017364501953, "objective/train/value_min": -0.1453857421875, "objective/train/value_reward_corr": 0.2860112049872721, "objective/train/value_std": 0.012298583984375, "objective/train/weight_avg": 1.0006964206695557, "objective/train/weighted_lm_loss": 2.308492660522461, "objective/train/weights_max": 1.0136374235153198, "objective/train/weights_min": 0.9656095504760742, "theoretical_loss": 3.422914040131161, "tokens_seen": 2053177344 }, { "epoch": 0.24, "learning_rate": 0.0007688328483672809, "loss": 1.2673, "theoretical_loss": 3.42290497763324, "tokens_seen": 2053242880 }, { "epoch": 0.24, "learning_rate": 0.0007685095376656968, "loss": 1.3445, "theoretical_loss": 3.422832490976414, "tokens_seen": 2053767168 }, { "epoch": 0.24, "learning_rate": 0.0007681862269641126, "loss": 1.3435, "theoretical_loss": 3.4227600280014467, "tokens_seen": 2054291456 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.008836128748953342, "objective/train/docs_used": 1163069, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4695441722869873, "objective/train/original_loss": 2.4695441722869873, "objective/train/theoretical_loss": 3.4226875886945614, "objective/train/tokens_used": 413674976, "objective/train/value_avg": -0.0200653076171875, "objective/train/value_loss": 0.0030409207101911306, "objective/train/value_max": -0.0006165504455566406, "objective/train/value_min": -0.955078125, "objective/train/value_reward_corr": 0.3555254600960528, "objective/train/value_std": 0.0280303955078125, "objective/train/weight_avg": 1.0008985996246338, "objective/train/weighted_lm_loss": 2.472344160079956, "objective/train/weights_max": 1.0623854398727417, "objective/train/weights_min": 0.9187532663345337, "theoretical_loss": 3.4226875886945614, "tokens_seen": 2054815744 }, { "epoch": 0.24, "learning_rate": 0.0007678629162625283, "loss": 1.3356, "theoretical_loss": 3.4226875886945614, "tokens_seen": 2054815744 }, { "epoch": 0.24, "learning_rate": 0.0007675396055609441, "loss": 1.3431, "theoretical_loss": 3.422615173041992, "tokens_seen": 2055340032 }, { "epoch": 0.24, "learning_rate": 0.0007672162948593598, "loss": 1.3134, "theoretical_loss": 3.422542781029982, "tokens_seen": 2055864320 }, { "epoch": 0.24, "learning_rate": 0.0007668929841577756, "loss": 1.3184, "theoretical_loss": 3.422470412644789, "tokens_seen": 2056388608 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.01286634337157011, "objective/train/docs_used": 1163822, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5987019538879395, "objective/train/original_loss": 2.598701238632202, "objective/train/theoretical_loss": 3.4224613682572165, "objective/train/tokens_used": 415313376, "objective/train/value_avg": -0.0248260498046875, "objective/train/value_loss": 0.002805235330015421, "objective/train/value_max": -0.0010814666748046875, "objective/train/value_min": -0.64501953125, "objective/train/value_reward_corr": 0.25381317636007666, "objective/train/value_std": 0.025909423828125, "objective/train/weight_avg": 1.0013004541397095, "objective/train/weighted_lm_loss": 2.603774070739746, "objective/train/weights_max": 1.0239759683609009, "objective/train/weights_min": 0.9141361117362976, "theoretical_loss": 3.4224613682572165, "tokens_seen": 2056454144 }, { "epoch": 0.24, "learning_rate": 0.0007665696734561915, "loss": 1.331, "theoretical_loss": 3.422398067872681, "tokens_seen": 2056912896 }, { "epoch": 0.24, "learning_rate": 0.0007662463627546072, "loss": 1.3445, "theoretical_loss": 3.4223257466999364, "tokens_seen": 2057437184 }, { "epoch": 0.24, "learning_rate": 0.000765923052053023, "loss": 1.2854, "theoretical_loss": 3.422253449112848, "tokens_seen": 2057961472 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.009886897169053555, "objective/train/docs_used": 1164494, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5435705184936523, "objective/train/original_loss": 2.543569803237915, "objective/train/theoretical_loss": 3.4222353783997286, "objective/train/tokens_used": 416951776, "objective/train/value_avg": -0.015106201171875, "objective/train/value_loss": 0.00037147142575122416, "objective/train/value_max": -0.0005860328674316406, "objective/train/value_min": -0.325439453125, "objective/train/value_reward_corr": 0.08938171601827499, "objective/train/value_std": 0.0131072998046875, "objective/train/weight_avg": 1.000990629196167, "objective/train/weighted_lm_loss": 2.5470988750457764, "objective/train/weights_max": 1.0277966260910034, "objective/train/weights_min": 0.9913603663444519, "theoretical_loss": 3.4222353783997286, "tokens_seen": 2058092544 }, { "epoch": 0.24, "learning_rate": 0.0007655997413514387, "loss": 1.3192, "theoretical_loss": 3.422181175097716, "tokens_seen": 2058485760 }, { "epoch": 0.24, "learning_rate": 0.0007652764306498545, "loss": 1.3409, "theoretical_loss": 3.422108924640856, "tokens_seen": 2059010048 }, { "epoch": 0.24, "learning_rate": 0.0007649531199482704, "loss": 1.3473, "theoretical_loss": 3.4220366977285925, "tokens_seen": 2059534336 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.011258062906563282, "objective/train/docs_used": 1165816, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.643962860107422, "objective/train/original_loss": 2.64396333694458, "objective/train/theoretical_loss": 3.4220096187037963, "objective/train/tokens_used": 418590176, "objective/train/value_avg": -0.0167999267578125, "objective/train/value_loss": 0.0018690339056774974, "objective/train/value_max": -0.0009813308715820312, "objective/train/value_min": -0.93603515625, "objective/train/value_reward_corr": 0.2579294089331166, "objective/train/value_std": 0.023529052734375, "objective/train/weight_avg": 1.001134991645813, "objective/train/weighted_lm_loss": 2.647549629211426, "objective/train/weights_max": 1.0492255687713623, "objective/train/weights_min": 0.9102020859718323, "theoretical_loss": 3.4220096187037963, "tokens_seen": 2059730944 }, { "epoch": 0.24, "learning_rate": 0.0007646298092466861, "loss": 1.3286, "theoretical_loss": 3.4219644943472627, "tokens_seen": 2060058624 }, { "epoch": 0.24, "learning_rate": 0.0007643064985451018, "loss": 1.3325, "theoretical_loss": 3.421892314483214, "tokens_seen": 2060582912 }, { "epoch": 0.24, "learning_rate": 0.0007639831878435176, "loss": 1.3278, "theoretical_loss": 3.421820158122806, "tokens_seen": 2061107200 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.009252803400158882, "objective/train/docs_used": 1166525, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.852177619934082, "objective/train/original_loss": 2.852177858352661, "objective/train/theoretical_loss": 3.4217840887522075, "objective/train/tokens_used": 420228576, "objective/train/value_avg": -0.0171966552734375, "objective/train/value_loss": 0.001181851141154766, "objective/train/value_max": -0.0008525848388671875, "objective/train/value_min": -0.8779296875, "objective/train/value_reward_corr": 0.378628503723748, "objective/train/value_std": 0.0204010009765625, "objective/train/weight_avg": 1.0009311437606812, "objective/train/weighted_lm_loss": 2.8556008338928223, "objective/train/weights_max": 1.0547549724578857, "objective/train/weights_min": 0.9195836782455444, "theoretical_loss": 3.4217840887522075, "tokens_seen": 2061369344 }, { "epoch": 0.24, "learning_rate": 0.0007636598771419334, "loss": 1.3094, "theoretical_loss": 3.42174802525241, "tokens_seen": 2061631488 }, { "epoch": 0.24, "learning_rate": 0.0007633365664403493, "loss": 1.3035, "theoretical_loss": 3.4216759158584074, "tokens_seen": 2062155776 }, { "epoch": 0.24, "learning_rate": 0.000763013255738765, "loss": 1.3354, "theoretical_loss": 3.4216038299271925, "tokens_seen": 2062680064 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.010479113087058067, "objective/train/docs_used": 1167825, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6180365085601807, "objective/train/original_loss": 2.6180365085601807, "objective/train/theoretical_loss": 3.4215587881288387, "objective/train/tokens_used": 421866976, "objective/train/value_avg": -0.01334381103515625, "objective/train/value_loss": 0.0002479450486134738, "objective/train/value_max": -0.0007066726684570312, "objective/train/value_min": -0.281005859375, "objective/train/value_reward_corr": 0.15055784933948835, "objective/train/value_std": 0.00994873046875, "objective/train/weight_avg": 1.0010491609573364, "objective/train/weighted_lm_loss": 2.6213347911834717, "objective/train/weights_max": 1.0276976823806763, "objective/train/weights_min": 0.9855284690856934, "theoretical_loss": 3.4215587881288387, "tokens_seen": 2063007744 }, { "epoch": 0.25, "learning_rate": 0.0007626899450371807, "loss": 1.2838, "theoretical_loss": 3.4215317674451704, "tokens_seen": 2063204352 }, { "epoch": 0.25, "learning_rate": 0.0007623666343355965, "loss": 1.3066, "theoretical_loss": 3.4214597283987565, "tokens_seen": 2063728640 }, { "epoch": 0.25, "learning_rate": 0.0007620433236340123, "loss": 1.3029, "theoretical_loss": 3.4213877127743793, "tokens_seen": 2064252928 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.00807128008455038, "objective/train/docs_used": 1168450, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.866901159286499, "objective/train/original_loss": 2.866901159286499, "objective/train/theoretical_loss": 3.4213337164186486, "objective/train/tokens_used": 423505376, "objective/train/value_avg": -0.02532958984375, "objective/train/value_loss": 0.005335991270840168, "objective/train/value_max": -0.0007791519165039062, "objective/train/value_min": -0.6318359375, "objective/train/value_reward_corr": 0.28453995074103683, "objective/train/value_std": 0.027252197265625, "objective/train/weight_avg": 1.0008333921432495, "objective/train/weighted_lm_loss": 2.8698651790618896, "objective/train/weights_max": 1.0569379329681396, "objective/train/weights_min": 0.9269784688949585, "theoretical_loss": 3.4213337164186486, "tokens_seen": 2064646144 }, { "epoch": 0.25, "learning_rate": 0.0007617200129324282, "loss": 1.3121, "theoretical_loss": 3.4213157205584768, "tokens_seen": 2064777216 }, { "epoch": 0.25, "learning_rate": 0.0007613967022308439, "loss": 1.2923, "theoretical_loss": 3.4212437517374994, "tokens_seen": 2065301504 }, { "epoch": 0.25, "learning_rate": 0.0007610733915292596, "loss": 1.3301, "theoretical_loss": 3.421171806297909, "tokens_seen": 2065825792 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.006382706109434366, "objective/train/docs_used": 1169491, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7029707431793213, "objective/train/original_loss": 2.7029712200164795, "objective/train/theoretical_loss": 3.4211088732076766, "objective/train/tokens_used": 425143776, "objective/train/value_avg": -0.0161895751953125, "objective/train/value_loss": 0.0022295741364359856, "objective/train/value_max": -0.001094818115234375, "objective/train/value_min": -0.37060546875, "objective/train/value_reward_corr": 0.36834603998268167, "objective/train/value_std": 0.0177459716796875, "objective/train/weight_avg": 1.0006492137908936, "objective/train/weighted_lm_loss": 2.705094337463379, "objective/train/weights_max": 1.031723976135254, "objective/train/weights_min": 0.9258981347084045, "theoretical_loss": 3.4211088732076766, "tokens_seen": 2066284544 }, { "epoch": 0.25, "learning_rate": 0.0007607500808276754, "loss": 1.3207, "theoretical_loss": 3.4210998842261784, "tokens_seen": 2066350080 }, { "epoch": 0.25, "learning_rate": 0.0007604267701260912, "loss": 1.2856, "theoretical_loss": 3.4210279855087906, "tokens_seen": 2066874368 }, { "epoch": 0.25, "learning_rate": 0.0007601034594245069, "loss": 1.3233, "theoretical_loss": 3.4209561101322414, "tokens_seen": 2067398656 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.002770314458757639, "objective/train/docs_used": 1170265, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.897768259048462, "objective/train/original_loss": 2.897768497467041, "objective/train/theoretical_loss": 3.4208842580830376, "objective/train/tokens_used": 426782176, "objective/train/value_avg": -0.020263671875, "objective/train/value_loss": 0.0018307839054614305, "objective/train/value_max": -0.0007238388061523438, "objective/train/value_min": -0.625, "objective/train/value_reward_corr": 0.5675402726551144, "objective/train/value_std": 0.0325927734375, "objective/train/weight_avg": 1.0002861022949219, "objective/train/weighted_lm_loss": 2.8997225761413574, "objective/train/weights_max": 1.061860203742981, "objective/train/weights_min": 0.9710991978645325, "theoretical_loss": 3.4208842580830376, "tokens_seen": 2067922944 }, { "epoch": 0.25, "learning_rate": 0.0007597801487229228, "loss": 1.3349, "theoretical_loss": 3.4208842580830376, "tokens_seen": 2067922944 }, { "epoch": 0.25, "learning_rate": 0.0007594568380213385, "loss": 1.33, "theoretical_loss": 3.4208124293476962, "tokens_seen": 2068447232 }, { "epoch": 0.25, "learning_rate": 0.0007591335273197543, "loss": 1.3429, "theoretical_loss": 3.4207406239127467, "tokens_seen": 2068971520 }, { "epoch": 0.25, "learning_rate": 0.0007588102166181701, "loss": 1.3307, "theoretical_loss": 3.420668841764728, "tokens_seen": 2069495808 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0054245442152023315, "objective/train/docs_used": 1170942, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0406506061553955, "objective/train/original_loss": 3.0406506061553955, "objective/train/theoretical_loss": 3.4206598706329183, "objective/train/tokens_used": 428420576, "objective/train/value_avg": -0.01873779296875, "objective/train/value_loss": 0.0050081429071724415, "objective/train/value_max": -0.0006437301635742188, "objective/train/value_min": -0.71435546875, "objective/train/value_reward_corr": 0.49059496412852105, "objective/train/value_std": 0.022979736328125, "objective/train/weight_avg": 1.000567078590393, "objective/train/weighted_lm_loss": 3.042804002761841, "objective/train/weights_max": 1.0291121006011963, "objective/train/weights_min": 0.9256455898284912, "theoretical_loss": 3.4206598706329183, "tokens_seen": 2069561344 }, { "epoch": 0.25, "learning_rate": 0.0007584869059165858, "loss": 1.2906, "theoretical_loss": 3.4205970828901933, "tokens_seen": 2070020096 }, { "epoch": 0.25, "learning_rate": 0.0007581635952150017, "loss": 1.3022, "theoretical_loss": 3.420525347275703, "tokens_seen": 2070544384 }, { "epoch": 0.25, "learning_rate": 0.0007578402845134174, "loss": 1.3225, "theoretical_loss": 3.420453634907832, "tokens_seen": 2071068672 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.006703423336148262, "objective/train/docs_used": 1172086, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8704283237457275, "objective/train/original_loss": 2.8704280853271484, "objective/train/theoretical_loss": 3.4204357104465757, "objective/train/tokens_used": 430058976, "objective/train/value_avg": -0.0247802734375, "objective/train/value_loss": 0.006496168207377195, "objective/train/value_max": -0.000415802001953125, "objective/train/value_min": -0.45166015625, "objective/train/value_reward_corr": 0.18903568633157097, "objective/train/value_std": 0.044769287109375, "objective/train/weight_avg": 1.0007025003433228, "objective/train/weighted_lm_loss": 2.8737404346466064, "objective/train/weights_max": 1.043297290802002, "objective/train/weights_min": 0.9110228419303894, "theoretical_loss": 3.4204357104465757, "tokens_seen": 2071199744 }, { "epoch": 0.25, "learning_rate": 0.0007575169738118331, "loss": 1.3652, "theoretical_loss": 3.4203819457731646, "tokens_seen": 2071592960 }, { "epoch": 0.25, "learning_rate": 0.000757193663110249, "loss": 1.3374, "theoretical_loss": 3.4203102798582963, "tokens_seen": 2072117248 }, { "epoch": 0.25, "learning_rate": 0.0007568703524086647, "loss": 1.3242, "theoretical_loss": 3.4202386371498346, "tokens_seen": 2072641536 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0073866103775799274, "objective/train/docs_used": 1172787, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2388079166412354, "objective/train/original_loss": 2.2388081550598145, "objective/train/theoretical_loss": 3.420211777114331, "objective/train/tokens_used": 431697376, "objective/train/value_avg": -0.016265869140625, "objective/train/value_loss": 0.0017008581198751926, "objective/train/value_max": -0.0006046295166015625, "objective/train/value_min": -0.72021484375, "objective/train/value_reward_corr": 0.5181896783654818, "objective/train/value_std": 0.023712158203125, "objective/train/weight_avg": 1.0007470846176147, "objective/train/weighted_lm_loss": 2.241532325744629, "objective/train/weights_max": 1.0739407539367676, "objective/train/weights_min": 0.9226791262626648, "theoretical_loss": 3.420211777114331, "tokens_seen": 2072838144 }, { "epoch": 0.25, "learning_rate": 0.0007565470417070806, "loss": 1.2779, "theoretical_loss": 3.4201670176343972, "tokens_seen": 2073165824 }, { "epoch": 0.25, "learning_rate": 0.0007562237310054964, "loss": 1.3334, "theoretical_loss": 3.4200954212986137, "tokens_seen": 2073690112 }, { "epoch": 0.25, "learning_rate": 0.000755900420303912, "loss": 1.3317, "theoretical_loss": 3.4200238481291243, "tokens_seen": 2074214400 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.006624219473451376, "objective/train/docs_used": 1174027, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.315950870513916, "objective/train/original_loss": 2.315950870513916, "objective/train/theoretical_loss": 3.419988070227568, "objective/train/tokens_used": 433335776, "objective/train/value_avg": -0.0204010009765625, "objective/train/value_loss": 0.0018967584474012256, "objective/train/value_max": -0.0005884170532226562, "objective/train/value_min": -0.351806640625, "objective/train/value_reward_corr": 0.45438941047059955, "objective/train/value_std": 0.03277587890625, "objective/train/weight_avg": 1.0006718635559082, "objective/train/weighted_lm_loss": 2.319225788116455, "objective/train/weights_max": 1.030362844467163, "objective/train/weights_min": 0.9274001717567444, "theoretical_loss": 3.419988070227568, "tokens_seen": 2074476544 }, { "epoch": 0.25, "learning_rate": 0.0007555771096023279, "loss": 1.3159, "theoretical_loss": 3.4199522981125803, "tokens_seen": 2074738688 }, { "epoch": 0.25, "learning_rate": 0.0007552537989007436, "loss": 1.3344, "theoretical_loss": 3.4198807712356434, "tokens_seen": 2075262976 }, { "epoch": 0.25, "learning_rate": 0.0007549304881991594, "loss": 1.3543, "theoretical_loss": 3.419809267484988, "tokens_seen": 2075787264 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.014058118686079979, "objective/train/docs_used": 1174812, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.813488245010376, "objective/train/original_loss": 2.813488483428955, "objective/train/theoretical_loss": 3.419764589378726, "objective/train/tokens_used": 434974176, "objective/train/value_avg": -0.021942138671875, "objective/train/value_loss": 0.0022409274242818356, "objective/train/value_max": -0.0010728836059570312, "objective/train/value_min": -0.375732421875, "objective/train/value_reward_corr": 0.3431842752775707, "objective/train/value_std": 0.0277862548828125, "objective/train/weight_avg": 1.0014169216156006, "objective/train/weighted_lm_loss": 2.8178515434265137, "objective/train/weights_max": 1.0221366882324219, "objective/train/weights_min": 0.9208689332008362, "theoretical_loss": 3.419764589378726, "tokens_seen": 2076114944 }, { "epoch": 0.25, "learning_rate": 0.0007546071774975753, "loss": 1.3301, "theoretical_loss": 3.4197377868472985, "tokens_seen": 2076311552 }, { "epoch": 0.25, "learning_rate": 0.0007542838667959909, "loss": 1.3398, "theoretical_loss": 3.41966632930927, "tokens_seen": 2076835840 }, { "epoch": 0.25, "learning_rate": 0.0007539605560944068, "loss": 1.3087, "theoretical_loss": 3.4195948948576094, "tokens_seen": 2077360128 }, { "epoch": 0.25, "objective/train/advantage_avg": -0.012110143899917603, "objective/train/docs_used": 1175928, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4686617851257324, "objective/train/original_loss": 2.4686620235443115, "objective/train/theoretical_loss": 3.419541334161302, "objective/train/tokens_used": 436612576, "objective/train/value_avg": -0.07086181640625, "objective/train/value_loss": 0.010395020246505737, "objective/train/value_max": -0.0009112358093261719, "objective/train/value_min": -0.9921875, "objective/train/value_reward_corr": 0.9228940746968597, "objective/train/value_std": 0.18994140625, "objective/train/weight_avg": 0.9988402128219604, "objective/train/weighted_lm_loss": 2.4626519680023193, "objective/train/weights_max": 1.0727002620697021, "objective/train/weights_min": 0.9111486673355103, "theoretical_loss": 3.419541334161302, "tokens_seen": 2077753344 }, { "epoch": 0.25, "learning_rate": 0.0007536372453928225, "loss": 1.3088, "theoretical_loss": 3.4195234834790336, "tokens_seen": 2077884416 }, { "epoch": 0.25, "learning_rate": 0.0007533139346912383, "loss": 1.3671, "theoretical_loss": 3.4194520951602723, "tokens_seen": 2078408704 }, { "epoch": 0.25, "learning_rate": 0.0007529906239896541, "loss": 1.3553, "theoretical_loss": 3.4193807298880636, "tokens_seen": 2078932992 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.009267359972000122, "objective/train/docs_used": 1176572, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.738114595413208, "objective/train/original_loss": 2.738114833831787, "objective/train/theoretical_loss": 3.419318304169841, "objective/train/tokens_used": 438250976, "objective/train/value_avg": -0.027496337890625, "objective/train/value_loss": 0.007033150643110275, "objective/train/value_max": -0.0005297660827636719, "objective/train/value_min": -0.9638671875, "objective/train/value_reward_corr": 0.5871554166842385, "objective/train/value_std": 0.0660400390625, "objective/train/weight_avg": 1.000961422920227, "objective/train/weighted_lm_loss": 2.7412147521972656, "objective/train/weights_max": 1.0878756046295166, "objective/train/weights_min": 0.9084756374359131, "theoretical_loss": 3.419318304169841, "tokens_seen": 2079391744 }, { "epoch": 0.26, "learning_rate": 0.0007526673132880698, "loss": 1.3233, "theoretical_loss": 3.419309387649159, "tokens_seen": 2079457280 }, { "epoch": 0.26, "learning_rate": 0.0007523440025864856, "loss": 1.295, "theoretical_loss": 3.41923806843032, "tokens_seen": 2079981568 }, { "epoch": 0.26, "learning_rate": 0.0007520206918849014, "loss": 1.3559, "theoretical_loss": 3.4191667722183183, "tokens_seen": 2080505856 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.009849660098552704, "objective/train/docs_used": 1177438, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.574160099029541, "objective/train/original_loss": 2.574159622192383, "objective/train/theoretical_loss": 3.4190954989999374, "objective/train/tokens_used": 439889376, "objective/train/value_avg": -0.01280975341796875, "objective/train/value_loss": 0.0005892095505259931, "objective/train/value_max": -0.00075531005859375, "objective/train/value_min": -0.21240234375, "objective/train/value_reward_corr": 0.3619328932561465, "objective/train/value_std": 0.01001739501953125, "objective/train/weight_avg": 1.0009878873825073, "objective/train/weighted_lm_loss": 2.5771944522857666, "objective/train/weights_max": 1.012326717376709, "objective/train/weights_min": 0.9508951902389526, "theoretical_loss": 3.4190954989999374, "tokens_seen": 2081030144 }, { "epoch": 0.26, "learning_rate": 0.0007516973811833172, "loss": 1.3564, "theoretical_loss": 3.4190954989999374, "tokens_seen": 2081030144 }, { "epoch": 0.26, "learning_rate": 0.000751374070481733, "loss": 1.3296, "theoretical_loss": 3.4190242487619718, "tokens_seen": 2081554432 }, { "epoch": 0.26, "learning_rate": 0.0007510507597801487, "loss": 1.3195, "theoretical_loss": 3.4189530214912267, "tokens_seen": 2082078720 }, { "epoch": 0.26, "learning_rate": 0.0007507274490785645, "loss": 1.3783, "theoretical_loss": 3.4188818171745172, "tokens_seen": 2082603008 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.008791422471404076, "objective/train/docs_used": 1177725, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0161092281341553, "objective/train/original_loss": 3.0161094665527344, "objective/train/theoretical_loss": 3.418872918248228, "objective/train/tokens_used": 441527776, "objective/train/value_avg": -0.0270233154296875, "objective/train/value_loss": 0.0029984323773533106, "objective/train/value_max": -0.0012254714965820312, "objective/train/value_min": -0.49658203125, "objective/train/value_reward_corr": 0.5628666482150742, "objective/train/value_std": 0.047149658203125, "objective/train/weight_avg": 1.0008939504623413, "objective/train/weighted_lm_loss": 3.0192759037017822, "objective/train/weights_max": 1.0307378768920898, "objective/train/weights_min": 0.9118233919143677, "theoretical_loss": 3.418872918248228, "tokens_seen": 2082668544 }, { "epoch": 0.26, "learning_rate": 0.0007504041383769803, "loss": 1.403, "theoretical_loss": 3.4188106357986716, "tokens_seen": 2083127296 }, { "epoch": 0.26, "learning_rate": 0.0007500808276753961, "loss": 1.3765, "theoretical_loss": 3.418739477350527, "tokens_seen": 2083651584 }, { "epoch": 0.26, "learning_rate": 0.0007497575169738119, "loss": 1.3602, "theoretical_loss": 3.4186683418169324, "tokens_seen": 2084175872 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.006143460050225258, "objective/train/docs_used": 1178778, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.847379446029663, "objective/train/original_loss": 2.847378969192505, "objective/train/theoretical_loss": 3.4186505615123917, "objective/train/tokens_used": 443166176, "objective/train/value_avg": -0.026092529296875, "objective/train/value_loss": 0.0022338523995131254, "objective/train/value_max": -0.0009002685546875, "objective/train/value_min": -0.3017578125, "objective/train/value_reward_corr": 0.6862976385370223, "objective/train/value_std": 0.033355712890625, "objective/train/weight_avg": 1.000625491142273, "objective/train/weighted_lm_loss": 2.8490519523620605, "objective/train/weights_max": 1.0205066204071045, "objective/train/weights_min": 0.9624524116516113, "theoretical_loss": 3.4186505615123917, "tokens_seen": 2084306944 }, { "epoch": 0.26, "learning_rate": 0.0007494342062722276, "loss": 1.3799, "theoretical_loss": 3.4185972291847464, "tokens_seen": 2084700160 }, { "epoch": 0.26, "learning_rate": 0.0007491108955706433, "loss": 1.3115, "theoretical_loss": 3.41852613944084, "tokens_seen": 2085224448 }, { "epoch": 0.26, "learning_rate": 0.0007487875848690592, "loss": 1.3654, "theoretical_loss": 3.4184550725720944, "tokens_seen": 2085748736 }, { "epoch": 0.26, "objective/train/advantage_avg": -0.0026194038800895214, "objective/train/docs_used": 1179467, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.979262113571167, "objective/train/original_loss": 2.979261636734009, "objective/train/theoretical_loss": 3.4184284283911417, "objective/train/tokens_used": 444804576, "objective/train/value_avg": -0.036773681640625, "objective/train/value_loss": 0.013158599846065044, "objective/train/value_max": -0.0012998580932617188, "objective/train/value_min": -0.96826171875, "objective/train/value_reward_corr": 0.5262026857720488, "objective/train/value_std": 0.0548095703125, "objective/train/weight_avg": 0.9998025894165039, "objective/train/weighted_lm_loss": 2.9798834323883057, "objective/train/weights_max": 1.0754274129867554, "objective/train/weights_min": 0.9088283777236938, "theoretical_loss": 3.4184284283911417, "tokens_seen": 2085945344 }, { "epoch": 0.26, "learning_rate": 0.000748464274167475, "loss": 1.3574, "theoretical_loss": 3.4183840285654012, "tokens_seen": 2086273024 }, { "epoch": 0.26, "learning_rate": 0.0007481409634658907, "loss": 1.3356, "theoretical_loss": 3.418313007407664, "tokens_seen": 2086797312 }, { "epoch": 0.26, "learning_rate": 0.0007478176527643065, "loss": 1.3543, "theoretical_loss": 3.4182420090857955, "tokens_seen": 2087321600 }, { "epoch": 0.26, "objective/train/advantage_avg": -0.008043970912694931, "objective/train/docs_used": 1180869, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4768407344818115, "objective/train/original_loss": 2.4768407344818115, "objective/train/theoretical_loss": 3.418206518484226, "objective/train/tokens_used": 446442976, "objective/train/value_avg": -0.019989013671875, "objective/train/value_loss": 0.005291982553899288, "objective/train/value_max": -0.0011205673217773438, "objective/train/value_min": -0.90673828125, "objective/train/value_reward_corr": 0.35653134433237277, "objective/train/value_std": 0.0266876220703125, "objective/train/weight_avg": 0.999221682548523, "objective/train/weighted_lm_loss": 2.478149890899658, "objective/train/weights_max": 1.0431759357452393, "objective/train/weights_min": 0.9063522219657898, "theoretical_loss": 3.418206518484226, "tokens_seen": 2087583744 }, { "epoch": 0.26, "learning_rate": 0.0007474943420627222, "loss": 1.3429, "theoretical_loss": 3.4181710335867206, "tokens_seen": 2087845888 }, { "epoch": 0.26, "learning_rate": 0.0007471710313611381, "loss": 1.3158, "theoretical_loss": 3.418100080897374, "tokens_seen": 2088370176 }, { "epoch": 0.26, "learning_rate": 0.0007468477206595539, "loss": 1.3798, "theoretical_loss": 3.418029151004702, "tokens_seen": 2088894464 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.002836530562490225, "objective/train/docs_used": 1181528, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.337650775909424, "objective/train/original_loss": 2.3376505374908447, "objective/train/theoretical_loss": 3.417984831392422, "objective/train/tokens_used": 448081376, "objective/train/value_avg": -0.0269317626953125, "objective/train/value_loss": 0.0012927282368764281, "objective/train/value_max": -0.0011339187622070312, "objective/train/value_min": -0.274658203125, "objective/train/value_reward_corr": 0.7740341951190066, "objective/train/value_std": 0.03802490234375, "objective/train/weight_avg": 1.000290036201477, "objective/train/weighted_lm_loss": 2.3386733531951904, "objective/train/weights_max": 1.019840121269226, "objective/train/weights_min": 0.9500049352645874, "theoretical_loss": 3.417984831392422, "tokens_seen": 2089222144 }, { "epoch": 0.26, "learning_rate": 0.0007465244099579696, "loss": 1.2919, "theoretical_loss": 3.4179582438956606, "tokens_seen": 2089418752 }, { "epoch": 0.26, "learning_rate": 0.0007462010992563854, "loss": 1.368, "theoretical_loss": 3.417887359557218, "tokens_seen": 2089943040 }, { "epoch": 0.26, "learning_rate": 0.0007458777885548011, "loss": 1.3451, "theoretical_loss": 3.417816497976352, "tokens_seen": 2090467328 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.01294355746358633, "objective/train/docs_used": 1182667, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.722816228866577, "objective/train/original_loss": 2.722815752029419, "objective/train/theoretical_loss": 3.4177633667175344, "objective/train/tokens_used": 449719776, "objective/train/value_avg": -0.01554107666015625, "objective/train/value_loss": 0.000394015311030671, "objective/train/value_max": -0.0007700920104980469, "objective/train/value_min": -0.481689453125, "objective/train/value_reward_corr": 0.41053976670011433, "objective/train/value_std": 0.01560211181640625, "objective/train/weight_avg": 1.0012964010238647, "objective/train/weighted_lm_loss": 2.7268333435058594, "objective/train/weights_max": 1.0340906381607056, "objective/train/weights_min": 0.9855301976203918, "theoretical_loss": 3.4177633667175344, "tokens_seen": 2090860544 }, { "epoch": 0.26, "learning_rate": 0.0007455544778532169, "loss": 1.3269, "theoretical_loss": 3.417745659140051, "tokens_seen": 2090991616 }, { "epoch": 0.26, "learning_rate": 0.0007452311671516328, "loss": 1.3444, "theoretical_loss": 3.4176748430353143, "tokens_seen": 2091515904 }, { "epoch": 0.26, "learning_rate": 0.0007449078564500485, "loss": 1.3471, "theoretical_loss": 3.417604049649153, "tokens_seen": 2092040192 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.008439915254712105, "objective/train/docs_used": 1183291, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.22770619392395, "objective/train/original_loss": 2.227705955505371, "objective/train/theoretical_loss": 3.417542124062389, "objective/train/tokens_used": 451358176, "objective/train/value_avg": -0.02777099609375, "objective/train/value_loss": 0.003976553212851286, "objective/train/value_max": -0.0005192756652832031, "objective/train/value_min": -0.875, "objective/train/value_reward_corr": 0.5246948898692599, "objective/train/value_std": 0.040679931640625, "objective/train/weight_avg": 1.0008636713027954, "objective/train/weighted_lm_loss": 2.2305760383605957, "objective/train/weights_max": 1.085466980934143, "objective/train/weights_min": 0.913277804851532, "theoretical_loss": 3.417542124062389, "tokens_seen": 2092498944 }, { "epoch": 0.26, "learning_rate": 0.0007445845457484643, "loss": 1.3442, "theoretical_loss": 3.4175332789685875, "tokens_seen": 2092564480 }, { "epoch": 0.26, "learning_rate": 0.00074426123504688, "loss": 1.3256, "theoretical_loss": 3.4174625309806492, "tokens_seen": 2093088768 }, { "epoch": 0.26, "learning_rate": 0.0007439379243452958, "loss": 1.343, "theoretical_loss": 3.41739180567238, "tokens_seen": 2093613056 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.01509292796254158, "objective/train/docs_used": 1183994, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.613130807876587, "objective/train/original_loss": 2.613130807876587, "objective/train/theoretical_loss": 3.4173211030308335, "objective/train/tokens_used": 452996576, "objective/train/value_avg": -0.02227783203125, "objective/train/value_loss": 0.0011120653944090009, "objective/train/value_max": -0.00051116943359375, "objective/train/value_min": -0.39697265625, "objective/train/value_reward_corr": 0.5120599659925047, "objective/train/value_std": 0.031829833984375, "objective/train/weight_avg": 1.0015147924423218, "objective/train/weighted_lm_loss": 2.618175745010376, "objective/train/weights_max": 1.03385591506958, "objective/train/weights_min": 0.9705950617790222, "theoretical_loss": 3.4173211030308335, "tokens_seen": 2094137344 }, { "epoch": 0.26, "learning_rate": 0.0007436146136437117, "loss": 1.3442, "theoretical_loss": 3.4173211030308335, "tokens_seen": 2094137344 }, { "epoch": 0.26, "learning_rate": 0.0007432913029421274, "loss": 1.3779, "theoretical_loss": 3.4172504230430727, "tokens_seen": 2094661632 }, { "epoch": 0.26, "learning_rate": 0.0007429679922405431, "loss": 1.3437, "theoretical_loss": 3.4171797656961713, "tokens_seen": 2095185920 }, { "epoch": 0.26, "learning_rate": 0.0007426446815389589, "loss": 1.3536, "theoretical_loss": 3.417109130977215, "tokens_seen": 2095710208 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.008397869765758514, "objective/train/docs_used": 1184873, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5236527919769287, "objective/train/original_loss": 2.523653030395508, "objective/train/theoretical_loss": 3.4171003032277296, "objective/train/tokens_used": 454634976, "objective/train/value_avg": -0.013916015625, "objective/train/value_loss": 0.0013565333792939782, "objective/train/value_max": -0.0006384849548339844, "objective/train/value_min": -0.245849609375, "objective/train/value_reward_corr": 0.4989836434871887, "objective/train/value_std": 0.013519287109375, "objective/train/weight_avg": 1.0008465051651, "objective/train/weighted_lm_loss": 2.5264039039611816, "objective/train/weights_max": 1.0169048309326172, "objective/train/weights_min": 0.9349772334098816, "theoretical_loss": 3.4171003032277296, "tokens_seen": 2095775744 }, { "epoch": 0.27, "learning_rate": 0.0007423213708373747, "loss": 1.287, "theoretical_loss": 3.417038518873298, "tokens_seen": 2096234496 }, { "epoch": 0.27, "learning_rate": 0.0007419980601357906, "loss": 1.3785, "theoretical_loss": 3.416967929371526, "tokens_seen": 2096758784 }, { "epoch": 0.27, "learning_rate": 0.0007416747494342063, "loss": 1.3706, "theoretical_loss": 3.416897362459016, "tokens_seen": 2097283072 }, { "epoch": 0.27, "objective/train/advantage_avg": -0.0003170316922478378, "objective/train/docs_used": 1185597, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7731101512908936, "objective/train/original_loss": 2.7731099128723145, "objective/train/theoretical_loss": 3.4168797242589526, "objective/train/tokens_used": 456273376, "objective/train/value_avg": -0.0187225341796875, "objective/train/value_loss": 0.004899013787508011, "objective/train/value_max": -0.00043892860412597656, "objective/train/value_min": -0.7021484375, "objective/train/value_reward_corr": 0.4471730176861235, "objective/train/value_std": 0.026336669921875, "objective/train/weight_avg": 0.9999924302101135, "objective/train/weighted_lm_loss": 2.7731525897979736, "objective/train/weights_max": 1.066197156906128, "objective/train/weights_min": 0.930682361125946, "theoretical_loss": 3.4168797242589526, "tokens_seen": 2097414144 }, { "epoch": 0.27, "learning_rate": 0.0007413514387326221, "loss": 1.3436, "theoretical_loss": 3.416826818122896, "tokens_seen": 2097807360 }, { "epoch": 0.27, "learning_rate": 0.0007410281280310378, "loss": 1.3464, "theoretical_loss": 3.416756296350302, "tokens_seen": 2098331648 }, { "epoch": 0.27, "learning_rate": 0.0007407048173294536, "loss": 1.3394, "theoretical_loss": 3.416685797128382, "tokens_seen": 2098855936 }, { "epoch": 0.27, "objective/train/advantage_avg": -0.005536246579140425, "objective/train/docs_used": 1186813, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9614052772521973, "objective/train/original_loss": 2.9614052772521973, "objective/train/theoretical_loss": 3.4166593657313866, "objective/train/tokens_used": 457911776, "objective/train/value_avg": -0.0184173583984375, "objective/train/value_loss": 0.009146133437752724, "objective/train/value_max": -0.000690460205078125, "objective/train/value_min": -0.5556640625, "objective/train/value_reward_corr": 0.410194874659098, "objective/train/value_std": 0.0180816650390625, "objective/train/weight_avg": 0.9994912147521973, "objective/train/weighted_lm_loss": 2.9601728916168213, "objective/train/weights_max": 1.0241669416427612, "objective/train/weights_min": 0.908617377281189, "theoretical_loss": 3.4166593657313866, "tokens_seen": 2099052544 }, { "epoch": 0.27, "learning_rate": 0.0007403815066278695, "loss": 1.3654, "theoretical_loss": 3.4166153204442957, "tokens_seen": 2099380224 }, { "epoch": 0.27, "learning_rate": 0.0007400581959262852, "loss": 1.2956, "theoretical_loss": 3.4165448662852116, "tokens_seen": 2099904512 }, { "epoch": 0.27, "learning_rate": 0.000739734885224701, "loss": 1.323, "theoretical_loss": 3.4164744346383094, "tokens_seen": 2100428800 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.006794080138206482, "objective/train/docs_used": 1187470, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2046308517456055, "objective/train/original_loss": 2.2046306133270264, "objective/train/theoretical_loss": 3.416439227252923, "objective/train/tokens_used": 459550176, "objective/train/value_avg": -0.018798828125, "objective/train/value_loss": 0.001158680533990264, "objective/train/value_max": -0.0012063980102539062, "objective/train/value_min": -0.9072265625, "objective/train/value_reward_corr": 0.5804608493971014, "objective/train/value_std": 0.0233306884765625, "objective/train/weight_avg": 1.000685214996338, "objective/train/weighted_lm_loss": 2.206937313079834, "objective/train/weights_max": 1.0368858575820923, "objective/train/weights_min": 0.9186185598373413, "theoretical_loss": 3.416439227252923, "tokens_seen": 2100690944 }, { "epoch": 0.27, "learning_rate": 0.0007394115745231167, "loss": 1.3863, "theoretical_loss": 3.4164040254907793, "tokens_seen": 2100953088 }, { "epoch": 0.27, "learning_rate": 0.0007390882638215325, "loss": 1.3618, "theoretical_loss": 3.4163336388298218, "tokens_seen": 2101477376 }, { "epoch": 0.27, "learning_rate": 0.0007387649531199482, "loss": 1.3109, "theoretical_loss": 3.416263274642648, "tokens_seen": 2102001664 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.003390837926417589, "objective/train/docs_used": 1188148, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.894831895828247, "objective/train/original_loss": 2.894831895828247, "objective/train/theoretical_loss": 3.4162193084324546, "objective/train/tokens_used": 461188576, "objective/train/value_avg": -0.0174407958984375, "objective/train/value_loss": 0.00171066471375525, "objective/train/value_max": -0.0011243820190429688, "objective/train/value_min": -0.439208984375, "objective/train/value_reward_corr": 0.4825816060471836, "objective/train/value_std": 0.01617431640625, "objective/train/weight_avg": 1.00034761428833, "objective/train/weighted_lm_loss": 2.897669553756714, "objective/train/weights_max": 1.0448247194290161, "objective/train/weights_min": 0.949958860874176, "theoretical_loss": 3.4162193084324546, "tokens_seen": 2102329344 }, { "epoch": 0.27, "learning_rate": 0.0007384416424183641, "loss": 1.3425, "theoretical_loss": 3.4161929329164793, "tokens_seen": 2102525952 }, { "epoch": 0.27, "learning_rate": 0.0007381183317167799, "loss": 1.2826, "theoretical_loss": 3.416122613638548, "tokens_seen": 2103050240 }, { "epoch": 0.27, "learning_rate": 0.0007377950210151956, "loss": 1.3195, "theoretical_loss": 3.4160523167960966, "tokens_seen": 2103574528 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.006661507301032543, "objective/train/docs_used": 1189669, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5795910358428955, "objective/train/original_loss": 2.5795907974243164, "objective/train/theoretical_loss": 3.415999608879873, "objective/train/tokens_used": 462826976, "objective/train/value_avg": -0.017303466796875, "objective/train/value_loss": 0.003121660090982914, "objective/train/value_max": -0.0007791519165039062, "objective/train/value_min": -0.92822265625, "objective/train/value_reward_corr": 0.4750324101362667, "objective/train/value_std": 0.0263671875, "objective/train/weight_avg": 1.0006815195083618, "objective/train/weighted_lm_loss": 2.581773281097412, "objective/train/weights_max": 1.0381126403808594, "objective/train/weights_min": 0.9075012803077698, "theoretical_loss": 3.415999608879873, "tokens_seen": 2103967744 }, { "epoch": 0.27, "learning_rate": 0.0007374717103136114, "loss": 1.3332, "theoretical_loss": 3.4159820423763776, "tokens_seen": 2104098816 }, { "epoch": 0.27, "learning_rate": 0.0007371483996120271, "loss": 1.3038, "theoretical_loss": 3.415911790366654, "tokens_seen": 2104623104 }, { "epoch": 0.27, "learning_rate": 0.000736825088910443, "loss": 1.3047, "theoretical_loss": 3.4158415607542, "tokens_seen": 2105147392 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.005734726320952177, "objective/train/docs_used": 1190193, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.14335560798645, "objective/train/original_loss": 2.143355369567871, "objective/train/theoretical_loss": 3.4157801282060665, "objective/train/tokens_used": 464465376, "objective/train/value_avg": -0.013885498046875, "objective/train/value_loss": 0.001137745683081448, "objective/train/value_max": -0.0004992485046386719, "objective/train/value_min": -0.235107421875, "objective/train/value_reward_corr": 0.3732123182875177, "objective/train/value_std": 0.0125579833984375, "objective/train/weight_avg": 1.0005791187286377, "objective/train/weighted_lm_loss": 2.144608497619629, "objective/train/weights_max": 1.0118926763534546, "objective/train/weights_min": 0.9385230541229248, "theoretical_loss": 3.4157801282060665, "tokens_seen": 2105606144 }, { "epoch": 0.27, "learning_rate": 0.0007365017782088588, "loss": 1.3334, "theoretical_loss": 3.4157713535262997, "tokens_seen": 2105671680 }, { "epoch": 0.27, "learning_rate": 0.0007361784675072744, "loss": 1.2842, "theoretical_loss": 3.4157011686702474, "tokens_seen": 2106195968 }, { "epoch": 0.27, "learning_rate": 0.0007358551568056903, "loss": 1.333, "theoretical_loss": 3.4156310061733475, "tokens_seen": 2106720256 }, { "epoch": 0.27, "objective/train/advantage_avg": 7.999977242434397e-05, "objective/train/docs_used": 1191719, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.547412633895874, "objective/train/original_loss": 2.547412633895874, "objective/train/theoretical_loss": 3.415560866022916, "objective/train/tokens_used": 466103776, "objective/train/value_avg": -0.0262603759765625, "objective/train/value_loss": 0.010723809711635113, "objective/train/value_max": -0.0006642341613769531, "objective/train/value_min": -0.9609375, "objective/train/value_reward_corr": 0.49429236471621446, "objective/train/value_std": 0.05511474609375, "objective/train/weight_avg": 1.0000605583190918, "objective/train/weighted_lm_loss": 2.5472099781036377, "objective/train/weights_max": 1.0949749946594238, "objective/train/weights_min": 0.906908392906189, "theoretical_loss": 3.415560866022916, "tokens_seen": 2107244544 }, { "epoch": 0.27, "learning_rate": 0.000735531846104106, "loss": 1.3118, "theoretical_loss": 3.415560866022916, "tokens_seen": 2107244544 }, { "epoch": 0.27, "learning_rate": 0.0007352085354025219, "loss": 1.3276, "theoretical_loss": 3.4154907482062784, "tokens_seen": 2107768832 }, { "epoch": 0.27, "learning_rate": 0.0007348852247009377, "loss": 1.3485, "theoretical_loss": 3.41542065271077, "tokens_seen": 2108293120 }, { "epoch": 0.27, "learning_rate": 0.0007345619139993533, "loss": 1.3281, "theoretical_loss": 3.4153505795237367, "tokens_seen": 2108817408 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.013734281994402409, "objective/train/docs_used": 1192553, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8582584857940674, "objective/train/original_loss": 2.8582582473754883, "objective/train/theoretical_loss": 3.4153418219432927, "objective/train/tokens_used": 467742176, "objective/train/value_avg": -0.0249176025390625, "objective/train/value_loss": 0.0033800022210925817, "objective/train/value_max": -0.0004839897155761719, "objective/train/value_min": -0.94775390625, "objective/train/value_reward_corr": 0.3217141671173881, "objective/train/value_std": 0.035980224609375, "objective/train/weight_avg": 1.0013900995254517, "objective/train/weighted_lm_loss": 2.8630802631378174, "objective/train/weights_max": 1.0751769542694092, "objective/train/weights_min": 0.908672571182251, "theoretical_loss": 3.4153418219432927, "tokens_seen": 2108882944 }, { "epoch": 0.27, "learning_rate": 0.0007342386032977692, "loss": 1.3367, "theoretical_loss": 3.415280528632536, "tokens_seen": 2109341696 }, { "epoch": 0.27, "learning_rate": 0.0007339152925961849, "loss": 1.3005, "theoretical_loss": 3.415210500024534, "tokens_seen": 2109865984 }, { "epoch": 0.27, "learning_rate": 0.0007335919818946008, "loss": 1.302, "theoretical_loss": 3.415140493687108, "tokens_seen": 2110390272 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.008514144457876682, "objective/train/docs_used": 1194000, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.874328851699829, "objective/train/original_loss": 2.874328851699829, "objective/train/theoretical_loss": 3.415122995581051, "objective/train/tokens_used": 469380576, "objective/train/value_avg": -0.0202789306640625, "objective/train/value_loss": 0.0026127793826162815, "objective/train/value_max": -0.0011072158813476562, "objective/train/value_min": -0.73193359375, "objective/train/value_reward_corr": 0.39390549227301364, "objective/train/value_std": 0.0207061767578125, "objective/train/weight_avg": 1.0008642673492432, "objective/train/weighted_lm_loss": 2.8772518634796143, "objective/train/weights_max": 1.0270427465438843, "objective/train/weights_min": 0.9195321202278137, "theoretical_loss": 3.415122995581051, "tokens_seen": 2110521344 }, { "epoch": 0.27, "learning_rate": 0.0007332686711930166, "loss": 1.3234, "theoretical_loss": 3.415070509607646, "tokens_seen": 2110914560 }, { "epoch": 0.27, "learning_rate": 0.0007329453604914322, "loss": 1.3192, "theoretical_loss": 3.415000547773545, "tokens_seen": 2111438848 }, { "epoch": 0.27, "learning_rate": 0.0007326220497898481, "loss": 1.3241, "theoretical_loss": 3.414930608172213, "tokens_seen": 2111963136 }, { "epoch": 0.27, "objective/train/advantage_avg": -0.02852867916226387, "objective/train/docs_used": 1194755, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5739529132843018, "objective/train/original_loss": 2.57395339012146, "objective/train/theoretical_loss": 3.414904386551031, "objective/train/tokens_used": 471018976, "objective/train/value_avg": -0.0247039794921875, "objective/train/value_loss": 0.02029641531407833, "objective/train/value_max": -0.0009889602661132812, "objective/train/value_min": -0.54931640625, "objective/train/value_reward_corr": 0.8353821934816444, "objective/train/value_std": 0.0555419921875, "objective/train/weight_avg": 0.9972466826438904, "objective/train/weighted_lm_loss": 2.5746049880981445, "objective/train/weights_max": 1.019140362739563, "objective/train/weights_min": 0.9285653829574585, "theoretical_loss": 3.414904386551031, "tokens_seen": 2112159744 }, { "epoch": 0.28, "learning_rate": 0.0007322987390882638, "loss": 1.3365, "theoretical_loss": 3.414860690791068, "tokens_seen": 2112487424 }, { "epoch": 0.28, "learning_rate": 0.0007319754283866796, "loss": 1.2738, "theoretical_loss": 3.414790795617539, "tokens_seen": 2113011712 }, { "epoch": 0.28, "learning_rate": 0.0007316521176850955, "loss": 1.329, "theoretical_loss": 3.4147209226390647, "tokens_seen": 2113536000 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.00525592640042305, "objective/train/docs_used": 1195575, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.232544183731079, "objective/train/original_loss": 2.232544183731079, "objective/train/theoretical_loss": 3.41468599446905, "objective/train/tokens_used": 472657376, "objective/train/value_avg": -0.0206756591796875, "objective/train/value_loss": 0.0035556445363909006, "objective/train/value_max": -0.0008692741394042969, "objective/train/value_min": -0.8037109375, "objective/train/value_reward_corr": 0.6200987834178081, "objective/train/value_std": 0.035247802734375, "objective/train/weight_avg": 1.0005429983139038, "objective/train/weighted_lm_loss": 2.234177827835083, "objective/train/weights_max": 1.0432372093200684, "objective/train/weights_min": 0.9191145896911621, "theoretical_loss": 3.41468599446905, "tokens_seen": 2113798144 }, { "epoch": 0.28, "learning_rate": 0.0007313288069835111, "loss": 1.3025, "theoretical_loss": 3.4146510718430934, "tokens_seen": 2114060288 }, { "epoch": 0.28, "learning_rate": 0.000731005496281927, "loss": 1.2891, "theoretical_loss": 3.414581243217085, "tokens_seen": 2114584576 }, { "epoch": 0.28, "learning_rate": 0.0007306821855803427, "loss": 1.3032, "theoretical_loss": 3.4145114367485077, "tokens_seen": 2115108864 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.006636181380599737, "objective/train/docs_used": 1196855, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.597548484802246, "objective/train/original_loss": 2.597548246383667, "objective/train/theoretical_loss": 3.4144678189519038, "objective/train/tokens_used": 474295776, "objective/train/value_avg": -0.0172119140625, "objective/train/value_loss": 0.004016223829239607, "objective/train/value_max": -0.0005617141723632812, "objective/train/value_min": -0.99267578125, "objective/train/value_reward_corr": 0.5445554078265157, "objective/train/value_std": 0.047271728515625, "objective/train/weight_avg": 1.0006834268569946, "objective/train/weighted_lm_loss": 2.5994112491607666, "objective/train/weights_max": 1.0689764022827148, "objective/train/weights_min": 0.908815860748291, "theoretical_loss": 3.4144678189519038, "tokens_seen": 2115436544 }, { "epoch": 0.28, "learning_rate": 0.0007303588748787585, "loss": 1.2989, "theoretical_loss": 3.4144416524248413, "tokens_seen": 2115633152 }, { "epoch": 0.28, "learning_rate": 0.0007300355641771744, "loss": 1.3602, "theoretical_loss": 3.4143718902335767, "tokens_seen": 2116157440 }, { "epoch": 0.28, "learning_rate": 0.00072971225347559, "loss": 1.3087, "theoretical_loss": 3.4143021501622117, "tokens_seen": 2116681728 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.004015612415969372, "objective/train/docs_used": 1197590, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6032304763793945, "objective/train/original_loss": 2.6032302379608154, "objective/train/theoretical_loss": 3.4142498596173594, "objective/train/tokens_used": 475934176, "objective/train/value_avg": -0.030792236328125, "objective/train/value_loss": 0.010339026339352131, "objective/train/value_max": -0.0007152557373046875, "objective/train/value_min": -0.99658203125, "objective/train/value_reward_corr": 0.6324282171427309, "objective/train/value_std": 0.091552734375, "objective/train/weight_avg": 1.0004525184631348, "objective/train/weighted_lm_loss": 2.6052868366241455, "objective/train/weights_max": 1.0908514261245728, "objective/train/weights_min": 0.9121763110160828, "theoretical_loss": 3.4142498596173594, "tokens_seen": 2117074944 }, { "epoch": 0.28, "learning_rate": 0.0007293889427740058, "loss": 1.3224, "theoretical_loss": 3.414232432198258, "tokens_seen": 2117206016 }, { "epoch": 0.28, "learning_rate": 0.0007290656320724216, "loss": 1.3109, "theoretical_loss": 3.414162736329234, "tokens_seen": 2117730304 }, { "epoch": 0.28, "learning_rate": 0.0007287423213708374, "loss": 1.31, "theoretical_loss": 3.4140930625426718, "tokens_seen": 2118254592 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.008193163201212883, "objective/train/docs_used": 1199039, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6765806674957275, "objective/train/original_loss": 2.6765806674957275, "objective/train/theoretical_loss": 3.4140321160841536, "objective/train/tokens_used": 477572576, "objective/train/value_avg": -0.0254669189453125, "objective/train/value_loss": 0.004785431548953056, "objective/train/value_max": -0.0008492469787597656, "objective/train/value_min": -0.9619140625, "objective/train/value_reward_corr": 0.5568963745960306, "objective/train/value_std": 0.0537109375, "objective/train/weight_avg": 1.0008429288864136, "objective/train/weighted_lm_loss": 2.67960262298584, "objective/train/weights_max": 1.0648908615112305, "objective/train/weights_min": 0.9084537029266357, "theoretical_loss": 3.4140321160841536, "tokens_seen": 2118713344 }, { "epoch": 0.28, "learning_rate": 0.0007284190106692532, "loss": 1.3368, "theoretical_loss": 3.414023410826111, "tokens_seen": 2118778880 }, { "epoch": 0.28, "learning_rate": 0.0007280956999676689, "loss": 1.3152, "theoretical_loss": 3.4139537811671015, "tokens_seen": 2119303168 }, { "epoch": 0.28, "learning_rate": 0.0007277723892660847, "loss": 1.3336, "theoretical_loss": 3.4138841735532046, "tokens_seen": 2119827456 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.004975001327693462, "objective/train/docs_used": 1199714, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.636861801147461, "objective/train/original_loss": 2.636861801147461, "objective/train/theoretical_loss": 3.4138145879719906, "objective/train/tokens_used": 479210976, "objective/train/value_avg": -0.012115478515625, "objective/train/value_loss": 0.0016116872429847717, "objective/train/value_max": -0.0006022453308105469, "objective/train/value_min": -0.59912109375, "objective/train/value_reward_corr": 0.4932835086748899, "objective/train/value_std": 0.017486572265625, "objective/train/weight_avg": 1.0005054473876953, "objective/train/weighted_lm_loss": 2.6389377117156982, "objective/train/weights_max": 1.0139081478118896, "objective/train/weights_min": 0.9310570359230042, "theoretical_loss": 3.4138145879719906, "tokens_seen": 2120351744 }, { "epoch": 0.28, "learning_rate": 0.0007274490785645005, "loss": 1.3001, "theoretical_loss": 3.4138145879719906, "tokens_seen": 2120351744 }, { "epoch": 0.28, "learning_rate": 0.0007271257678629163, "loss": 1.3053, "theoretical_loss": 3.4137450244110403, "tokens_seen": 2120876032 }, { "epoch": 0.28, "learning_rate": 0.000726802457161332, "loss": 1.3243, "theoretical_loss": 3.4136754828579448, "tokens_seen": 2121400320 }, { "epoch": 0.28, "learning_rate": 0.0007264791464597479, "loss": 1.2826, "theoretical_loss": 3.413605963300305, "tokens_seen": 2121924608 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.007356142159551382, "objective/train/docs_used": 1200493, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.964031934738159, "objective/train/original_loss": 2.96403169631958, "objective/train/theoretical_loss": 3.413597274901538, "objective/train/tokens_used": 480849376, "objective/train/value_avg": -0.0270233154296875, "objective/train/value_loss": 0.006517711561173201, "objective/train/value_max": -0.0005998611450195312, "objective/train/value_min": -0.97802734375, "objective/train/value_reward_corr": 0.568379796640301, "objective/train/value_std": 0.052093505859375, "objective/train/weight_avg": 1.0007675886154175, "objective/train/weighted_lm_loss": 2.966961622238159, "objective/train/weights_max": 1.058939814567566, "objective/train/weights_min": 0.906498908996582, "theoretical_loss": 3.413597274901538, "tokens_seen": 2121990144 }, { "epoch": 0.28, "learning_rate": 0.0007261558357581636, "loss": 1.3017, "theoretical_loss": 3.4135364657257306, "tokens_seen": 2122448896 }, { "epoch": 0.28, "learning_rate": 0.0007258325250565794, "loss": 1.2776, "theoretical_loss": 3.4134669901218446, "tokens_seen": 2122973184 }, { "epoch": 0.28, "learning_rate": 0.0007255092143549952, "loss": 1.298, "theoretical_loss": 3.413397536476276, "tokens_seen": 2123497472 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.009390114806592464, "objective/train/docs_used": 1201963, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6426401138305664, "objective/train/original_loss": 2.642639636993408, "objective/train/theoretical_loss": 3.4133801764944227, "objective/train/tokens_used": 482487776, "objective/train/value_avg": -0.015869140625, "objective/train/value_loss": 0.001287141814827919, "objective/train/value_max": -0.0006666183471679688, "objective/train/value_min": -0.88671875, "objective/train/value_reward_corr": 0.599521333194416, "objective/train/value_std": 0.024932861328125, "objective/train/weight_avg": 1.0009453296661377, "objective/train/weighted_lm_loss": 2.645794630050659, "objective/train/weights_max": 1.0249836444854736, "objective/train/weights_min": 0.9224400520324707, "theoretical_loss": 3.4133801764944227, "tokens_seen": 2123628544 }, { "epoch": 0.28, "learning_rate": 0.0007251859036534109, "loss": 1.3058, "theoretical_loss": 3.4133281047766673, "tokens_seen": 2124021760 }, { "epoch": 0.28, "learning_rate": 0.0007248625929518268, "loss": 1.3147, "theoretical_loss": 3.4132586950106685, "tokens_seen": 2124546048 }, { "epoch": 0.28, "learning_rate": 0.0007245392822502424, "loss": 1.3235, "theoretical_loss": 3.4131893071659416, "tokens_seen": 2125070336 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.015306500717997551, "objective/train/docs_used": 1202675, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.772237539291382, "objective/train/original_loss": 2.772237539291382, "objective/train/theoretical_loss": 3.4131632923732296, "objective/train/tokens_used": 484126176, "objective/train/value_avg": -0.0276641845703125, "objective/train/value_loss": 0.0024556652642786503, "objective/train/value_max": -0.0008864402770996094, "objective/train/value_min": -0.9892578125, "objective/train/value_reward_corr": 0.782518742807367, "objective/train/value_std": 0.07080078125, "objective/train/weight_avg": 1.0015429258346558, "objective/train/weighted_lm_loss": 2.776664972305298, "objective/train/weights_max": 1.0850067138671875, "objective/train/weights_min": 0.9127750992774963, "theoretical_loss": 3.4131632923732296, "tokens_seen": 2125266944 }, { "epoch": 0.28, "learning_rate": 0.0007242159715486582, "loss": 1.2822, "theoretical_loss": 3.413119941230156, "tokens_seen": 2125594624 }, { "epoch": 0.28, "learning_rate": 0.0007238926608470741, "loss": 1.273, "theoretical_loss": 3.4130505971909946, "tokens_seen": 2126118912 }, { "epoch": 0.28, "learning_rate": 0.0007235693501454898, "loss": 1.2745, "theoretical_loss": 3.412981275036147, "tokens_seen": 2126643200 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.011406142264604568, "objective/train/docs_used": 1203810, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.601656436920166, "objective/train/original_loss": 2.601656436920166, "objective/train/theoretical_loss": 3.4129466221614972, "objective/train/tokens_used": 485764576, "objective/train/value_avg": -0.0155487060546875, "objective/train/value_loss": 0.0008396340417675674, "objective/train/value_max": -0.0006117820739746094, "objective/train/value_min": -0.67578125, "objective/train/value_reward_corr": 0.43474805050329246, "objective/train/value_std": 0.017120361328125, "objective/train/weight_avg": 1.0011446475982666, "objective/train/weighted_lm_loss": 2.604846715927124, "objective/train/weights_max": 1.028624176979065, "objective/train/weights_min": 0.9108483195304871, "theoretical_loss": 3.4129466221614972, "tokens_seen": 2126905344 }, { "epoch": 0.28, "learning_rate": 0.0007232460394439057, "loss": 1.3044, "theoretical_loss": 3.4129119747533143, "tokens_seen": 2127167488 }, { "epoch": 0.28, "learning_rate": 0.0007229227287423213, "loss": 1.3095, "theoretical_loss": 3.412842696330207, "tokens_seen": 2127691776 }, { "epoch": 0.28, "learning_rate": 0.0007225994180407371, "loss": 1.3154, "theoretical_loss": 3.412773439754547, "tokens_seen": 2128216064 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0060959854163229465, "objective/train/docs_used": 1204521, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.479057550430298, "objective/train/original_loss": 2.479057550430298, "objective/train/theoretical_loss": 3.4127301654837137, "objective/train/tokens_used": 487402976, "objective/train/value_avg": -0.0172882080078125, "objective/train/value_loss": 0.004024309106171131, "objective/train/value_max": -0.0008559226989746094, "objective/train/value_min": -0.94970703125, "objective/train/value_reward_corr": 0.5396947091171609, "objective/train/value_std": 0.042144775390625, "objective/train/weight_avg": 1.0006294250488281, "objective/train/weighted_lm_loss": 2.481194257736206, "objective/train/weights_max": 1.0767936706542969, "objective/train/weights_min": 0.9085661172866821, "theoretical_loss": 3.4127301654837137, "tokens_seen": 2128543744 }, { "epoch": 0.29, "learning_rate": 0.000722276107339153, "loss": 1.329, "theoretical_loss": 3.412704205014064, "tokens_seen": 2128740352 }, { "epoch": 0.29, "learning_rate": 0.0007219527966375687, "loss": 1.3259, "theoretical_loss": 3.412634992096499, "tokens_seen": 2129264640 }, { "epoch": 0.29, "learning_rate": 0.0007216294859359846, "loss": 1.3537, "theoretical_loss": 3.4125658009896016, "tokens_seen": 2129788928 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.009903536178171635, "objective/train/docs_used": 1206012, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4987432956695557, "objective/train/original_loss": 2.4987428188323975, "objective/train/theoretical_loss": 3.4125139219653167, "objective/train/tokens_used": 489041376, "objective/train/value_avg": -0.0167388916015625, "objective/train/value_loss": 0.0012915852712467313, "objective/train/value_max": -0.0007410049438476562, "objective/train/value_min": -0.78515625, "objective/train/value_reward_corr": 0.6206841825764304, "objective/train/value_std": 0.026580810546875, "objective/train/weight_avg": 1.000996708869934, "objective/train/weighted_lm_loss": 2.501779556274414, "objective/train/weights_max": 1.0431560277938843, "objective/train/weights_min": 0.9155706167221069, "theoretical_loss": 3.4125139219653167, "tokens_seen": 2130182144 }, { "epoch": 0.29, "learning_rate": 0.0007213061752344002, "loss": 1.319, "theoretical_loss": 3.412496631681133, "tokens_seen": 2130313216 }, { "epoch": 0.29, "learning_rate": 0.000720982864532816, "loss": 1.3138, "theoretical_loss": 3.4124274841588633, "tokens_seen": 2130837504 }, { "epoch": 0.29, "learning_rate": 0.0007206595538312319, "loss": 1.2597, "theoretical_loss": 3.412358358410573, "tokens_seen": 2131361792 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.00888853520154953, "objective/train/docs_used": 1206751, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.0867550373077393, "objective/train/original_loss": 2.0867550373077393, "objective/train/theoretical_loss": 3.4122978912326865, "objective/train/tokens_used": 490679776, "objective/train/value_avg": -0.012847900390625, "objective/train/value_loss": 0.0001783570769475773, "objective/train/value_max": -0.0006537437438964844, "objective/train/value_min": -0.1842041015625, "objective/train/value_reward_corr": 0.27883839384976233, "objective/train/value_std": 0.00970458984375, "objective/train/weight_avg": 1.0008896589279175, "objective/train/weighted_lm_loss": 2.089038133621216, "objective/train/weights_max": 1.018401026725769, "objective/train/weights_min": 0.9960187077522278, "theoretical_loss": 3.4122978912326865, "tokens_seen": 2131820544 }, { "epoch": 0.29, "learning_rate": 0.0007203362431296476, "loss": 1.344, "theoretical_loss": 3.412289254424051, "tokens_seen": 2131886080 }, { "epoch": 0.29, "learning_rate": 0.0007200129324280634, "loss": 1.3066, "theoretical_loss": 3.412220172187098, "tokens_seen": 2132410368 }, { "epoch": 0.29, "learning_rate": 0.0007196896217264791, "loss": 1.3153, "theoretical_loss": 3.412151111687523, "tokens_seen": 2132934656 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.008685121312737465, "objective/train/docs_used": 1207876, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2673566341400146, "objective/train/original_loss": 2.2673566341400146, "objective/train/theoretical_loss": 3.4120820729131465, "objective/train/tokens_used": 492318176, "objective/train/value_avg": -0.0127105712890625, "objective/train/value_loss": 0.0004715832183137536, "objective/train/value_max": -0.00041413307189941406, "objective/train/value_min": -0.3916015625, "objective/train/value_reward_corr": 0.3554413034835681, "objective/train/value_std": 0.0122833251953125, "objective/train/weight_avg": 1.0008708238601685, "objective/train/weighted_lm_loss": 2.2696681022644043, "objective/train/weights_max": 1.025644063949585, "objective/train/weights_min": 0.9610297679901123, "theoretical_loss": 3.4120820729131465, "tokens_seen": 2133458944 }, { "epoch": 0.29, "learning_rate": 0.0007193663110248949, "loss": 1.2836, "theoretical_loss": 3.4120820729131465, "tokens_seen": 2133458944 }, { "epoch": 0.29, "learning_rate": 0.0007190430003233108, "loss": 1.2964, "theoretical_loss": 3.4120130558517965, "tokens_seen": 2133983232 }, { "epoch": 0.29, "learning_rate": 0.0007187196896217265, "loss": 1.3096, "theoretical_loss": 3.411944060491313, "tokens_seen": 2134507520 }, { "epoch": 0.29, "learning_rate": 0.0007183963789201423, "loss": 1.3358, "theoretical_loss": 3.4118750868195447, "tokens_seen": 2135031808 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.011661229655146599, "objective/train/docs_used": 1208575, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6299033164978027, "objective/train/original_loss": 2.6299030780792236, "objective/train/theoretical_loss": 3.4118664666349563, "objective/train/tokens_used": 493956576, "objective/train/value_avg": -0.0172271728515625, "objective/train/value_loss": 0.0010233805514872074, "objective/train/value_max": -0.0005359649658203125, "objective/train/value_min": -0.97509765625, "objective/train/value_reward_corr": 0.34094410729416813, "objective/train/value_std": 0.0238189697265625, "objective/train/weight_avg": 1.0011712312698364, "objective/train/weighted_lm_loss": 2.6337876319885254, "objective/train/weights_max": 1.0483232736587524, "objective/train/weights_min": 0.9335894584655762, "theoretical_loss": 3.4118664666349563, "tokens_seen": 2135097344 }, { "epoch": 0.29, "learning_rate": 0.000718073068218558, "loss": 1.276, "theoretical_loss": 3.4118061348243494, "tokens_seen": 2135556096 }, { "epoch": 0.29, "learning_rate": 0.0007177497575169738, "loss": 1.2858, "theoretical_loss": 3.411737204493597, "tokens_seen": 2136080384 }, { "epoch": 0.29, "learning_rate": 0.0007174264468153895, "loss": 1.2523, "theoretical_loss": 3.411668295815165, "tokens_seen": 2136604672 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.008103215135633945, "objective/train/docs_used": 1209997, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.715740919113159, "objective/train/original_loss": 2.71574068069458, "objective/train/theoretical_loss": 3.411651072027313, "objective/train/tokens_used": 495594976, "objective/train/value_avg": -0.028839111328125, "objective/train/value_loss": 0.0023650238290429115, "objective/train/value_max": -0.0004992485046386719, "objective/train/value_min": -0.432373046875, "objective/train/value_reward_corr": 0.7994927895488375, "objective/train/value_std": 0.056365966796875, "objective/train/weight_avg": 1.0008220672607422, "objective/train/weighted_lm_loss": 2.7178854942321777, "objective/train/weights_max": 1.0302679538726807, "objective/train/weights_min": 0.9541273713111877, "theoretical_loss": 3.411651072027313, "tokens_seen": 2136735744 }, { "epoch": 0.29, "learning_rate": 0.0007171031361138054, "loss": 1.2928, "theoretical_loss": 3.411599408776942, "tokens_seen": 2137128960 }, { "epoch": 0.29, "learning_rate": 0.0007167798254122212, "loss": 1.339, "theoretical_loss": 3.4115305433668244, "tokens_seen": 2137653248 }, { "epoch": 0.29, "learning_rate": 0.0007164565147106369, "loss": 1.2853, "theoretical_loss": 3.4114616995727203, "tokens_seen": 2138177536 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.011575371026992798, "objective/train/docs_used": 1210838, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9362101554870605, "objective/train/original_loss": 2.9362101554870605, "objective/train/theoretical_loss": 3.4114358887203435, "objective/train/tokens_used": 497233376, "objective/train/value_avg": -0.0297088623046875, "objective/train/value_loss": 0.003369653830304742, "objective/train/value_max": -0.0007181167602539062, "objective/train/value_min": -0.8232421875, "objective/train/value_reward_corr": 0.6242811771666695, "objective/train/value_std": 0.05950927734375, "objective/train/weight_avg": 1.0011742115020752, "objective/train/weighted_lm_loss": 2.9397213459014893, "objective/train/weights_max": 1.0805566310882568, "objective/train/weights_min": 0.9242470264434814, "theoretical_loss": 3.4114358887203435, "tokens_seen": 2138374144 }, { "epoch": 0.29, "learning_rate": 0.0007161332040090527, "loss": 1.3031, "theoretical_loss": 3.4113928773825473, "tokens_seen": 2138701824 }, { "epoch": 0.29, "learning_rate": 0.0007158098933074684, "loss": 1.3098, "theoretical_loss": 3.411324076784232, "tokens_seen": 2139226112 }, { "epoch": 0.29, "learning_rate": 0.0007154865826058843, "loss": 1.2746, "theoretical_loss": 3.4112552977657105, "tokens_seen": 2139750400 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.00795222818851471, "objective/train/docs_used": 1212386, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4948108196258545, "objective/train/original_loss": 2.4948108196258545, "objective/train/theoretical_loss": 3.4112209163451057, "objective/train/tokens_used": 498871776, "objective/train/value_avg": -0.020660400390625, "objective/train/value_loss": 0.004307425580918789, "objective/train/value_max": -0.0007066726684570312, "objective/train/value_min": -0.98681640625, "objective/train/value_reward_corr": 0.5029849102583246, "objective/train/value_std": 0.0386962890625, "objective/train/weight_avg": 1.0008163452148438, "objective/train/weighted_lm_loss": 2.4971253871917725, "objective/train/weights_max": 1.1010035276412964, "objective/train/weights_min": 0.907378077507019, "theoretical_loss": 3.4112209163451057, "tokens_seen": 2140012544 }, { "epoch": 0.29, "learning_rate": 0.0007151632719043001, "loss": 1.2771, "theoretical_loss": 3.41118654031493, "tokens_seen": 2140274688 }, { "epoch": 0.29, "learning_rate": 0.0007148399612027157, "loss": 1.3134, "theoretical_loss": 3.411117804419846, "tokens_seen": 2140798976 }, { "epoch": 0.29, "learning_rate": 0.0007145166505011316, "loss": 1.2771, "theoretical_loss": 3.411049090068424, "tokens_seen": 2141323264 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.00037012159009464085, "objective/train/docs_used": 1213144, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5333492755889893, "objective/train/original_loss": 2.5333495140075684, "objective/train/theoretical_loss": 3.411006154533583, "objective/train/tokens_used": 500510176, "objective/train/value_avg": -0.0211639404296875, "objective/train/value_loss": 0.0020064725540578365, "objective/train/value_max": -0.0009889602661132812, "objective/train/value_min": -0.35791015625, "objective/train/value_reward_corr": 0.6959641393967186, "objective/train/value_std": 0.0257720947265625, "objective/train/weight_avg": 1.000046968460083, "objective/train/weighted_lm_loss": 2.534590244293213, "objective/train/weights_max": 1.0212271213531494, "objective/train/weights_min": 0.9287452101707458, "theoretical_loss": 3.411006154533583, "tokens_seen": 2141650944 }, { "epoch": 0.29, "learning_rate": 0.0007141933397995473, "loss": 1.2827, "theoretical_loss": 3.4109803972486397, "tokens_seen": 2141847552 }, { "epoch": 0.29, "learning_rate": 0.0007138700290979632, "loss": 1.285, "theoretical_loss": 3.410911725948477, "tokens_seen": 2142371840 }, { "epoch": 0.29, "learning_rate": 0.000713546718396379, "loss": 1.3262, "theoretical_loss": 3.410843076155932, "tokens_seen": 2142896128 }, { "epoch": 0.29, "objective/train/advantage_avg": -0.0006150956614874303, "objective/train/docs_used": 1213818, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6917524337768555, "objective/train/original_loss": 2.6917521953582764, "objective/train/theoretical_loss": 3.4107916029186804, "objective/train/tokens_used": 502148576, "objective/train/value_avg": -0.01396942138671875, "objective/train/value_loss": 0.003098409855738282, "objective/train/value_max": -0.0006823539733886719, "objective/train/value_min": -0.853515625, "objective/train/value_reward_corr": 0.5283699176392359, "objective/train/value_std": 0.0213623046875, "objective/train/weight_avg": 0.9999536871910095, "objective/train/weighted_lm_loss": 2.6923580169677734, "objective/train/weights_max": 1.0358282327651978, "objective/train/weights_min": 0.9253856539726257, "theoretical_loss": 3.4107916029186804, "tokens_seen": 2143289344 }, { "epoch": 0.29, "learning_rate": 0.0007132234076947947, "loss": 1.2695, "theoretical_loss": 3.4107744478590085, "tokens_seen": 2143420416 }, { "epoch": 0.29, "learning_rate": 0.0007129000969932105, "loss": 1.2837, "theoretical_loss": 3.4107058410457194, "tokens_seen": 2143944704 }, { "epoch": 0.29, "learning_rate": 0.0007125767862916262, "loss": 1.289, "theoretical_loss": 3.4106372557040894, "tokens_seen": 2144468992 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.010809112340211868, "objective/train/docs_used": 1215022, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.602994918823242, "objective/train/original_loss": 2.6029951572418213, "objective/train/theoretical_loss": 3.4105772611342253, "objective/train/tokens_used": 503786976, "objective/train/value_avg": -0.01776123046875, "objective/train/value_loss": 0.0008168371859937906, "objective/train/value_max": -0.0005679130554199219, "objective/train/value_min": -0.77685546875, "objective/train/value_reward_corr": 0.3830118525399673, "objective/train/value_std": 0.0203704833984375, "objective/train/weight_avg": 1.0010850429534912, "objective/train/weighted_lm_loss": 2.606508731842041, "objective/train/weights_max": 1.0370547771453857, "objective/train/weights_min": 0.9606934189796448, "theoretical_loss": 3.4105772611342253, "tokens_seen": 2144927744 }, { "epoch": 0.3, "learning_rate": 0.0007122534755900421, "loss": 1.2889, "theoretical_loss": 3.410568691822151, "tokens_seen": 2144993280 }, { "epoch": 0.3, "learning_rate": 0.0007119301648884579, "loss": 1.322, "theoretical_loss": 3.410500149387947, "tokens_seen": 2145517568 }, { "epoch": 0.3, "learning_rate": 0.0007116068541868736, "loss": 1.2858, "theoretical_loss": 3.4104316283895297, "tokens_seen": 2146041856 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0025292846839874983, "objective/train/docs_used": 1215651, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.515333414077759, "objective/train/original_loss": 2.5153331756591797, "objective/train/theoretical_loss": 3.41036312881496, "objective/train/tokens_used": 505425376, "objective/train/value_avg": -0.0265350341796875, "objective/train/value_loss": 0.005545430816709995, "objective/train/value_max": -0.0009074211120605469, "objective/train/value_min": -0.9765625, "objective/train/value_reward_corr": 0.7043284248230662, "objective/train/value_std": 0.06024169921875, "objective/train/weight_avg": 1.0002801418304443, "objective/train/weighted_lm_loss": 2.5168094635009766, "objective/train/weights_max": 1.0467665195465088, "objective/train/weights_min": 0.9136062264442444, "theoretical_loss": 3.41036312881496, "tokens_seen": 2146566144 }, { "epoch": 0.3, "learning_rate": 0.0007112835434852894, "loss": 1.2883, "theoretical_loss": 3.41036312881496, "tokens_seen": 2146566144 }, { "epoch": 0.3, "learning_rate": 0.0007109602327837051, "loss": 1.2906, "theoretical_loss": 3.410294650652311, "tokens_seen": 2147090432 }, { "epoch": 0.3, "learning_rate": 0.0007106369220821209, "loss": 1.3211, "theoretical_loss": 3.4102261938896623, "tokens_seen": 2147614720 }, { "epoch": 0.3, "learning_rate": 0.0007103136113805368, "loss": 1.3004, "theoretical_loss": 3.410157758515105, "tokens_seen": 2148139008 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.011117985472083092, "objective/train/docs_used": 1216251, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.679807662963867, "objective/train/original_loss": 2.679807662963867, "objective/train/theoretical_loss": 3.410149205596542, "objective/train/tokens_used": 507063776, "objective/train/value_avg": -0.016754150390625, "objective/train/value_loss": 0.0005519221303984523, "objective/train/value_max": -0.0009288787841796875, "objective/train/value_min": -0.251953125, "objective/train/value_reward_corr": 0.31742491446004356, "objective/train/value_std": 0.01505279541015625, "objective/train/weight_avg": 1.0011144876480103, "objective/train/weighted_lm_loss": 2.6834394931793213, "objective/train/weights_max": 1.0222713947296143, "objective/train/weights_min": 0.9718999266624451, "theoretical_loss": 3.410149205596542, "tokens_seen": 2148204544 }, { "epoch": 0.3, "learning_rate": 0.0007099903006789525, "loss": 1.2807, "theoretical_loss": 3.410089344516738, "tokens_seen": 2148663296 }, { "epoch": 0.3, "learning_rate": 0.0007096669899773683, "loss": 1.2931, "theoretical_loss": 3.4100209518826725, "tokens_seen": 2149187584 }, { "epoch": 0.3, "learning_rate": 0.000709343679275784, "loss": 1.3141, "theoretical_loss": 3.409952580601027, "tokens_seen": 2149711872 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.007817800156772137, "objective/train/docs_used": 1217415, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.569763660430908, "objective/train/original_loss": 2.5697638988494873, "objective/train/theoretical_loss": 3.4099354911155397, "objective/train/tokens_used": 508702176, "objective/train/value_avg": -0.01389312744140625, "objective/train/value_loss": 0.0014025191776454449, "objective/train/value_max": -0.0006613731384277344, "objective/train/value_min": -0.5888671875, "objective/train/value_reward_corr": 0.4605757294630158, "objective/train/value_std": 0.01812744140625, "objective/train/weight_avg": 1.000788688659668, "objective/train/weighted_lm_loss": 2.572129011154175, "objective/train/weights_max": 1.0218210220336914, "objective/train/weights_min": 0.9164601564407349, "theoretical_loss": 3.4099354911155397, "tokens_seen": 2149842944 }, { "epoch": 0.3, "learning_rate": 0.0007090203685741998, "loss": 1.2785, "theoretical_loss": 3.4098842306599293, "tokens_seen": 2150236160 }, { "epoch": 0.3, "learning_rate": 0.0007086970578726157, "loss": 1.27, "theoretical_loss": 3.4098159020475185, "tokens_seen": 2150760448 }, { "epoch": 0.3, "learning_rate": 0.0007083737471710314, "loss": 1.3171, "theoretical_loss": 3.4097475947519413, "tokens_seen": 2151284736 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0035357512533664703, "objective/train/docs_used": 1217907, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.342170238494873, "objective/train/original_loss": 2.342170476913452, "objective/train/theoretical_loss": 3.4097219850094285, "objective/train/tokens_used": 510340576, "objective/train/value_avg": -0.01534271240234375, "objective/train/value_loss": 0.0018321318784728646, "objective/train/value_max": -0.0005955696105957031, "objective/train/value_min": -0.64013671875, "objective/train/value_reward_corr": 0.28779056245321327, "objective/train/value_std": 0.0148773193359375, "objective/train/weight_avg": 1.0003626346588135, "objective/train/weighted_lm_loss": 2.3429887294769287, "objective/train/weights_max": 1.0203477144241333, "objective/train/weights_min": 0.950828492641449, "theoretical_loss": 3.4097219850094285, "tokens_seen": 2151481344 }, { "epoch": 0.3, "learning_rate": 0.0007080504364694471, "loss": 1.3125, "theoretical_loss": 3.409679308761355, "tokens_seen": 2151809024 }, { "epoch": 0.3, "learning_rate": 0.0007077271257678629, "loss": 1.3363, "theoretical_loss": 3.409611044063926, "tokens_seen": 2152333312 }, { "epoch": 0.3, "learning_rate": 0.0007074038150662787, "loss": 1.3186, "theoretical_loss": 3.40954280064783, "tokens_seen": 2152857600 }, { "debugging/Self-BLEU-5": 0.41444976139630557, "debugging/distinct-1-grams": 0.7932536337909882, "debugging/distinct-2-grams": 0.9579868635707071, "debugging/entropy-1-grams": 5.725708682766013, "debugging/entropy-2-grams": 6.52068183489502, "debugging/length": 484.6, "debugging/num_segments": 10, "debugging/raw_token_scores_avg": 0.008234597742557526, "debugging/raw_token_scores_std": 0.05009995773434639, "epoch": 0.3, "objective/train/advantage_avg": 0.00925829540938139, "objective/train/docs_used": 1219413, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7713656425476074, "objective/train/original_loss": 2.771365165710449, "objective/train/theoretical_loss": 3.40950868691659, "objective/train/tokens_used": 511978976, "objective/train/value_avg": -0.017486572265625, "objective/train/value_loss": 0.0020895609632134438, "objective/train/value_max": -0.0003542900085449219, "objective/train/value_min": -0.888671875, "objective/train/value_reward_corr": 0.44994540275985206, "objective/train/value_std": 0.0233306884765625, "objective/train/weight_avg": 1.0009361505508423, "objective/train/weighted_lm_loss": 2.7740695476531982, "objective/train/weights_max": 1.0398285388946533, "objective/train/weights_min": 0.909256100654602, "theoretical_loss": 3.40950868691659, "tokens_seen": 2153119744 }, { "epoch": 0.3, "learning_rate": 0.0007070805043646946, "loss": 1.267, "theoretical_loss": 3.409474578501253, "tokens_seen": 2153381888 }, { "epoch": 0.3, "learning_rate": 0.0007067571936631103, "loss": 1.3262, "theoretical_loss": 3.409406377612389, "tokens_seen": 2153906176 }, { "epoch": 0.3, "learning_rate": 0.000706433882961526, "loss": 1.2986, "theoretical_loss": 3.4093381979694435, "tokens_seen": 2154430464 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.008791059255599976, "objective/train/docs_used": 1220074, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.199626922607422, "objective/train/original_loss": 2.1996266841888428, "objective/train/theoretical_loss": 3.4092955964763068, "objective/train/tokens_used": 513617376, "objective/train/value_avg": -0.0251312255859375, "objective/train/value_loss": 0.0034104851074516773, "objective/train/value_max": -0.0009546279907226562, "objective/train/value_min": -0.830078125, "objective/train/value_reward_corr": 0.6943895815467057, "objective/train/value_std": 0.041534423828125, "objective/train/weight_avg": 1.0008959770202637, "objective/train/weighted_lm_loss": 2.202648162841797, "objective/train/weights_max": 1.037733793258667, "objective/train/weights_min": 0.9099645614624023, "theoretical_loss": 3.4092955964763068, "tokens_seen": 2154758144 }, { "epoch": 0.3, "learning_rate": 0.0007061105722599418, "loss": 1.3174, "theoretical_loss": 3.4092700395606284, "tokens_seen": 2154954752 }, { "epoch": 0.3, "learning_rate": 0.0007057872615583576, "loss": 1.3622, "theoretical_loss": 3.4092019023741678, "tokens_seen": 2155479040 }, { "epoch": 0.3, "learning_rate": 0.0007054639508567733, "loss": 1.2941, "theoretical_loss": 3.409133786398294, "tokens_seen": 2156003328 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0061523690819740295, "objective/train/docs_used": 1221515, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.50469708442688, "objective/train/original_loss": 2.504697561264038, "objective/train/theoretical_loss": 3.4090827133287624, "objective/train/tokens_used": 515255776, "objective/train/value_avg": -0.019439697265625, "objective/train/value_loss": 0.003793556010350585, "objective/train/value_max": -0.00033020973205566406, "objective/train/value_min": -0.9716796875, "objective/train/value_reward_corr": 0.6807903401239289, "objective/train/value_std": 0.05133056640625, "objective/train/weight_avg": 1.000633955001831, "objective/train/weighted_lm_loss": 2.5067362785339355, "objective/train/weights_max": 1.09394109249115, "objective/train/weights_min": 0.9084156155586243, "theoretical_loss": 3.4090827133287624, "tokens_seen": 2156396544 }, { "epoch": 0.3, "learning_rate": 0.0007051406401551892, "loss": 1.3014, "theoretical_loss": 3.4090656916212483, "tokens_seen": 2156527616 }, { "epoch": 0.3, "learning_rate": 0.0007048173294536049, "loss": 1.3259, "theoretical_loss": 3.4089976180312833, "tokens_seen": 2157051904 }, { "epoch": 0.3, "learning_rate": 0.0007044940187520207, "loss": 1.2796, "theoretical_loss": 3.4089295656166576, "tokens_seen": 2157576192 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.00644260598346591, "objective/train/docs_used": 1222265, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4791462421417236, "objective/train/original_loss": 2.4791464805603027, "objective/train/theoretical_loss": 3.408870037115035, "objective/train/tokens_used": 516894176, "objective/train/value_avg": -0.01534271240234375, "objective/train/value_loss": 0.0015596000012010336, "objective/train/value_max": -0.0004458427429199219, "objective/train/value_min": -0.8359375, "objective/train/value_reward_corr": 0.2812658411197976, "objective/train/value_std": 0.0211944580078125, "objective/train/weight_avg": 1.0006519556045532, "objective/train/weighted_lm_loss": 2.4810547828674316, "objective/train/weights_max": 1.0632386207580566, "objective/train/weights_min": 0.9414373636245728, "theoretical_loss": 3.408870037115035, "tokens_seen": 2158034944 }, { "epoch": 0.3, "learning_rate": 0.0007041707080504365, "loss": 1.2501, "theoretical_loss": 3.4088615343656423, "tokens_seen": 2158100480 }, { "epoch": 0.3, "learning_rate": 0.0007038473973488522, "loss": 1.35, "theoretical_loss": 3.4087935242665166, "tokens_seen": 2158624768 }, { "epoch": 0.3, "learning_rate": 0.0007035240866472681, "loss": 1.3037, "theoretical_loss": 3.408725535307569, "tokens_seen": 2159149056 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.00877588614821434, "objective/train/docs_used": 1223379, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4261960983276367, "objective/train/original_loss": 2.4261953830718994, "objective/train/theoretical_loss": 3.408657567477097, "objective/train/tokens_used": 518532576, "objective/train/value_avg": -0.01261138916015625, "objective/train/value_loss": 0.0005217537982389331, "objective/train/value_max": -0.00045299530029296875, "objective/train/value_min": -0.37158203125, "objective/train/value_reward_corr": 0.37934224171784414, "objective/train/value_std": 0.014434814453125, "objective/train/weight_avg": 1.0008801221847534, "objective/train/weighted_lm_loss": 2.428795337677002, "objective/train/weights_max": 1.0279020071029663, "objective/train/weights_min": 0.9546464085578918, "theoretical_loss": 3.408657567477097, "tokens_seen": 2159673344 }, { "epoch": 0.3, "learning_rate": 0.0007032007759456838, "loss": 1.3165, "theoretical_loss": 3.408657567477097, "tokens_seen": 2159673344 }, { "epoch": 0.3, "learning_rate": 0.0007028774652440996, "loss": 1.2989, "theoretical_loss": 3.4085896207634083, "tokens_seen": 2160197632 }, { "epoch": 0.3, "learning_rate": 0.0007025541545425154, "loss": 1.2853, "theoretical_loss": 3.408521695154819, "tokens_seen": 2160721920 }, { "epoch": 0.3, "learning_rate": 0.0007022308438409311, "loss": 1.3131, "theoretical_loss": 3.4084537906396557, "tokens_seen": 2161246208 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.006182631477713585, "objective/train/docs_used": 1223894, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4515135288238525, "objective/train/original_loss": 2.4515135288238525, "objective/train/theoretical_loss": 3.4084453040578104, "objective/train/tokens_used": 520170976, "objective/train/value_avg": -0.01910400390625, "objective/train/value_loss": 0.002478188369423151, "objective/train/value_max": -0.0002472400665283203, "objective/train/value_min": -0.58544921875, "objective/train/value_reward_corr": 0.2596886640791908, "objective/train/value_std": 0.0160369873046875, "objective/train/weight_avg": 1.000630497932434, "objective/train/weighted_lm_loss": 2.4539577960968018, "objective/train/weights_max": 1.0242441892623901, "objective/train/weights_min": 0.9162093997001648, "theoretical_loss": 3.4084453040578104, "tokens_seen": 2161311744 }, { "epoch": 0.31, "learning_rate": 0.000701907533139347, "loss": 1.3032, "theoretical_loss": 3.408385907206253, "tokens_seen": 2161770496 }, { "epoch": 0.31, "learning_rate": 0.0007015842224377627, "loss": 1.3, "theoretical_loss": 3.4083180448429546, "tokens_seen": 2162294784 }, { "epoch": 0.31, "learning_rate": 0.0007012609117361784, "loss": 1.3103, "theoretical_loss": 3.408250203538116, "tokens_seen": 2162819072 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.009293771348893642, "objective/train/docs_used": 1225280, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6205313205718994, "objective/train/original_loss": 2.6205310821533203, "objective/train/theoretical_loss": 3.408233246500926, "objective/train/tokens_used": 521809376, "objective/train/value_avg": -0.01396942138671875, "objective/train/value_loss": 0.0009817081736400723, "objective/train/value_max": -0.0005660057067871094, "objective/train/value_min": -0.57568359375, "objective/train/value_reward_corr": 0.3787201607767209, "objective/train/value_std": 0.024627685546875, "objective/train/weight_avg": 1.0009342432022095, "objective/train/weighted_lm_loss": 2.623688220977783, "objective/train/weights_max": 1.0561764240264893, "objective/train/weights_min": 0.9389215111732483, "theoretical_loss": 3.408233246500926, "tokens_seen": 2162950144 }, { "epoch": 0.31, "learning_rate": 0.0007009376010345943, "loss": 1.3122, "theoretical_loss": 3.4081823832800984, "tokens_seen": 2163343360 }, { "epoch": 0.31, "learning_rate": 0.00070061429033301, "loss": 1.3061, "theoretical_loss": 3.4081145840572744, "tokens_seen": 2163867648 }, { "epoch": 0.31, "learning_rate": 0.0007002909796314259, "loss": 1.3246, "theoretical_loss": 3.4080468058580258, "tokens_seen": 2164391936 }, { "epoch": 0.31, "objective/train/advantage_avg": -0.00690333591774106, "objective/train/docs_used": 1225734, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2437360286712646, "objective/train/original_loss": 2.2437360286712646, "objective/train/theoretical_loss": 3.4080213944510787, "objective/train/tokens_used": 523447776, "objective/train/value_avg": -0.0238189697265625, "objective/train/value_loss": 0.006729920394718647, "objective/train/value_max": -0.00075531005859375, "objective/train/value_min": -0.9365234375, "objective/train/value_reward_corr": 0.6572844654549284, "objective/train/value_std": 0.04510498046875, "objective/train/weight_avg": 0.9993427991867065, "objective/train/weighted_lm_loss": 2.2448108196258545, "objective/train/weights_max": 1.079404592514038, "objective/train/weights_min": 0.9088374376296997, "theoretical_loss": 3.4080213944510787, "tokens_seen": 2164588544 }, { "epoch": 0.31, "learning_rate": 0.0006999676689298415, "loss": 1.3387, "theoretical_loss": 3.407979048670743, "tokens_seen": 2164916224 }, { "epoch": 0.31, "learning_rate": 0.0006996443582282573, "loss": 1.3158, "theoretical_loss": 3.4079113124838263, "tokens_seen": 2165440512 }, { "epoch": 0.31, "learning_rate": 0.0006993210475266732, "loss": 1.3091, "theoretical_loss": 3.407843597285684, "tokens_seen": 2165964800 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.00469951331615448, "objective/train/docs_used": 1227178, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7058019638061523, "objective/train/original_loss": 2.7058017253875732, "objective/train/theoretical_loss": 3.407809747553784, "objective/train/tokens_used": 525086176, "objective/train/value_avg": -0.023956298828125, "objective/train/value_loss": 0.0042751505970954895, "objective/train/value_max": -0.0005130767822265625, "objective/train/value_min": -0.9931640625, "objective/train/value_reward_corr": 0.49582911619540765, "objective/train/value_std": 0.036834716796875, "objective/train/weight_avg": 1.0004911422729492, "objective/train/weighted_lm_loss": 2.709228754043579, "objective/train/weights_max": 1.0818318128585815, "objective/train/weights_min": 0.9076187014579773, "theoretical_loss": 3.407809747553784, "tokens_seen": 2166226944 }, { "epoch": 0.31, "learning_rate": 0.0006989977368250889, "loss": 1.3291, "theoretical_loss": 3.407775903064735, "tokens_seen": 2166489088 }, { "epoch": 0.31, "learning_rate": 0.0006986744261235047, "loss": 1.3354, "theoretical_loss": 3.4077082298094066, "tokens_seen": 2167013376 }, { "epoch": 0.31, "learning_rate": 0.0006983511154219206, "loss": 1.3201, "theoretical_loss": 3.4076405775081353, "tokens_seen": 2167537664 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.006472716573625803, "objective/train/docs_used": 1227923, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8816802501678467, "objective/train/original_loss": 2.881680727005005, "objective/train/theoretical_loss": 3.407598305455439, "objective/train/tokens_used": 526724576, "objective/train/value_avg": -0.01461029052734375, "objective/train/value_loss": 0.0016986647387966514, "objective/train/value_max": -0.0006880760192871094, "objective/train/value_min": -0.87841796875, "objective/train/value_reward_corr": 0.3987265015143652, "objective/train/value_std": 0.019683837890625, "objective/train/weight_avg": 1.0006556510925293, "objective/train/weighted_lm_loss": 2.883932113647461, "objective/train/weights_max": 1.0399051904678345, "objective/train/weights_min": 0.9354196786880493, "theoretical_loss": 3.407598305455439, "tokens_seen": 2167865344 }, { "epoch": 0.31, "learning_rate": 0.0006980278047203362, "loss": 1.3125, "theoretical_loss": 3.407572946149367, "tokens_seen": 2168061952 }, { "epoch": 0.31, "learning_rate": 0.0006977044940187521, "loss": 1.3208, "theoretical_loss": 3.407505335721557, "tokens_seen": 2168586240 }, { "epoch": 0.31, "learning_rate": 0.0006973811833171678, "loss": 1.3159, "theoretical_loss": 3.4074377462131684, "tokens_seen": 2169110528 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.002176760695874691, "objective/train/docs_used": 1229408, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0809249877929688, "objective/train/original_loss": 3.080925226211548, "objective/train/theoretical_loss": 3.407387067803314, "objective/train/tokens_used": 528362976, "objective/train/value_avg": -0.017822265625, "objective/train/value_loss": 0.004524109419435263, "objective/train/value_max": -0.0006093978881835938, "objective/train/value_min": -0.98486328125, "objective/train/value_reward_corr": 0.40765240583261975, "objective/train/value_std": 0.034759521484375, "objective/train/weight_avg": 1.0002399682998657, "objective/train/weighted_lm_loss": 3.0821919441223145, "objective/train/weights_max": 1.086098313331604, "objective/train/weights_min": 0.9065372943878174, "theoretical_loss": 3.407387067803314, "tokens_seen": 2169503744 }, { "epoch": 0.31, "learning_rate": 0.0006970578726155836, "loss": 1.3176, "theoretical_loss": 3.407370177612676, "tokens_seen": 2169634816 }, { "epoch": 0.31, "learning_rate": 0.0006967345619139994, "loss": 1.3147, "theoretical_loss": 3.4073026299085614, "tokens_seen": 2170159104 }, { "epoch": 0.31, "learning_rate": 0.0006964112512124151, "loss": 1.3217, "theoretical_loss": 3.4072351030893158, "tokens_seen": 2170683392 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.01105399988591671, "objective/train/docs_used": 1230033, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.610478639602661, "objective/train/original_loss": 2.6104788780212402, "objective/train/theoretical_loss": 3.407176034245555, "objective/train/tokens_used": 530001376, "objective/train/value_avg": -0.0262298583984375, "objective/train/value_loss": 0.003223809413611889, "objective/train/value_max": -0.0009112358093261719, "objective/train/value_min": -0.94970703125, "objective/train/value_reward_corr": 0.444214449946167, "objective/train/value_std": 0.033447265625, "objective/train/weight_avg": 1.001121163368225, "objective/train/weighted_lm_loss": 2.6134793758392334, "objective/train/weights_max": 1.0760308504104614, "objective/train/weights_min": 0.9127951860427856, "theoretical_loss": 3.407176034245555, "tokens_seen": 2171142144 }, { "epoch": 0.31, "learning_rate": 0.0006960879405108309, "loss": 1.2985, "theoretical_loss": 3.4071675971434403, "tokens_seen": 2171207680 }, { "epoch": 0.31, "learning_rate": 0.0006957646298092467, "loss": 1.3265, "theoretical_loss": 3.4071001120594446, "tokens_seen": 2171731968 }, { "epoch": 0.31, "learning_rate": 0.0006954413191076625, "loss": 1.3463, "theoretical_loss": 3.407032647825848, "tokens_seen": 2172256256 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0038341134786605835, "objective/train/docs_used": 1231439, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9162070751190186, "objective/train/original_loss": 2.9162073135375977, "objective/train/theoretical_loss": 3.406965204431178, "objective/train/tokens_used": 531639776, "objective/train/value_avg": -0.0148773193359375, "objective/train/value_loss": 0.0009583953651599586, "objective/train/value_max": -0.001056671142578125, "objective/train/value_min": -0.1961669921875, "objective/train/value_reward_corr": 0.6011444940214272, "objective/train/value_std": 0.0158843994140625, "objective/train/weight_avg": 1.0003881454467773, "objective/train/weighted_lm_loss": 2.918799638748169, "objective/train/weights_max": 1.0181487798690796, "objective/train/weights_min": 0.9694803357124329, "theoretical_loss": 3.406965204431178, "tokens_seen": 2172780544 }, { "epoch": 0.31, "learning_rate": 0.0006951180084060783, "loss": 1.3315, "theoretical_loss": 3.406965204431178, "tokens_seen": 2172780544 }, { "epoch": 0.31, "learning_rate": 0.000694794697704494, "loss": 1.289, "theoretical_loss": 3.406897781863971, "tokens_seen": 2173304832 }, { "epoch": 0.31, "learning_rate": 0.0006944713870029098, "loss": 1.3547, "theoretical_loss": 3.4068303801127744, "tokens_seen": 2173829120 }, { "epoch": 0.31, "learning_rate": 0.0006941480763013256, "loss": 1.2928, "theoretical_loss": 3.4067629991661423, "tokens_seen": 2174353408 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.010608423501253128, "objective/train/docs_used": 1232037, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.459190607070923, "objective/train/original_loss": 2.459190607070923, "objective/train/theoretical_loss": 3.4067545780100645, "objective/train/tokens_used": 533278176, "objective/train/value_avg": -0.0183258056640625, "objective/train/value_loss": 0.0006396312383003533, "objective/train/value_max": -0.0004494190216064453, "objective/train/value_min": -0.34521484375, "objective/train/value_reward_corr": 0.38776251276492885, "objective/train/value_std": 0.0201873779296875, "objective/train/weight_avg": 1.0010640621185303, "objective/train/weighted_lm_loss": 2.4626080989837646, "objective/train/weights_max": 1.0350611209869385, "objective/train/weights_min": 0.9668741226196289, "theoretical_loss": 3.4067545780100645, "tokens_seen": 2174418944 }, { "epoch": 0.31, "learning_rate": 0.0006938247655997414, "loss": 1.3349, "theoretical_loss": 3.4066956390126397, "tokens_seen": 2174877696 }, { "epoch": 0.31, "learning_rate": 0.0006935014548981572, "loss": 1.3156, "theoretical_loss": 3.4066282996408392, "tokens_seen": 2175401984 }, { "epoch": 0.31, "learning_rate": 0.0006931781441965729, "loss": 1.3535, "theoretical_loss": 3.4065609810393234, "tokens_seen": 2175926272 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.005105558317154646, "objective/train/docs_used": 1233356, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.530858278274536, "objective/train/original_loss": 2.5308585166931152, "objective/train/theoretical_loss": 3.4065441546329644, "objective/train/tokens_used": 534916576, "objective/train/value_avg": -0.0165557861328125, "objective/train/value_loss": 0.0020301544573158026, "objective/train/value_max": -0.0005617141723632812, "objective/train/value_min": -0.1519775390625, "objective/train/value_reward_corr": 0.32478841714776047, "objective/train/value_std": 0.01522064208984375, "objective/train/weight_avg": 1.0005205869674683, "objective/train/weighted_lm_loss": 2.532424211502075, "objective/train/weights_max": 1.012444019317627, "objective/train/weights_min": 0.9342984557151794, "theoretical_loss": 3.4065441546329644, "tokens_seen": 2176057344 }, { "epoch": 0.31, "learning_rate": 0.0006928548334949886, "loss": 1.2796, "theoretical_loss": 3.4064936831966834, "tokens_seen": 2176450560 }, { "epoch": 0.31, "learning_rate": 0.0006925315227934045, "loss": 1.3059, "theoretical_loss": 3.40642640610152, "tokens_seen": 2176974848 }, { "epoch": 0.31, "learning_rate": 0.0006922082120918203, "loss": 1.3211, "theoretical_loss": 3.406359149742442, "tokens_seen": 2177499136 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.02451823279261589, "objective/train/docs_used": 1233682, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0390465259552, "objective/train/original_loss": 3.0390467643737793, "objective/train/theoretical_loss": 3.406333933951487, "objective/train/tokens_used": 536554976, "objective/train/value_avg": -0.06103515625, "objective/train/value_loss": 0.009237349033355713, "objective/train/value_max": -0.0004189014434814453, "objective/train/value_min": -0.9921875, "objective/train/value_reward_corr": 0.7399589489678524, "objective/train/value_std": 0.13671875, "objective/train/weight_avg": 1.002497911453247, "objective/train/weighted_lm_loss": 3.0448403358459473, "objective/train/weights_max": 1.1042418479919434, "objective/train/weights_min": 0.9073794484138489, "theoretical_loss": 3.406333933951487, "tokens_seen": 2177695744 }, { "epoch": 0.32, "learning_rate": 0.000691884901390236, "loss": 1.3273, "theoretical_loss": 3.406291914108068, "tokens_seen": 2178023424 }, { "epoch": 0.32, "learning_rate": 0.0006915615906886518, "loss": 1.3153, "theoretical_loss": 3.4062246991870255, "tokens_seen": 2178547712 }, { "epoch": 0.32, "learning_rate": 0.0006912382799870675, "loss": 1.317, "theoretical_loss": 3.40615750496795, "tokens_seen": 2179072000 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.002928321249783039, "objective/train/docs_used": 1234902, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5248312950134277, "objective/train/original_loss": 2.524831771850586, "objective/train/theoretical_loss": 3.4061239156181013, "objective/train/tokens_used": 538193376, "objective/train/value_avg": -0.022308349609375, "objective/train/value_loss": 0.004614802543073893, "objective/train/value_max": -0.0008358955383300781, "objective/train/value_min": -0.59912109375, "objective/train/value_reward_corr": 0.3253018097954679, "objective/train/value_std": 0.031036376953125, "objective/train/weight_avg": 1.000315546989441, "objective/train/weighted_lm_loss": 2.5263688564300537, "objective/train/weights_max": 1.0606722831726074, "objective/train/weights_min": 0.9171360731124878, "theoretical_loss": 3.4061239156181013, "tokens_seen": 2179334144 }, { "epoch": 0.32, "learning_rate": 0.0006909149692854834, "loss": 1.3421, "theoretical_loss": 3.4060903314394873, "tokens_seen": 2179596288 }, { "epoch": 0.32, "learning_rate": 0.0006905916585838992, "loss": 1.3151, "theoretical_loss": 3.4060231785902917, "tokens_seen": 2180120576 }, { "epoch": 0.32, "learning_rate": 0.0006902683478823149, "loss": 1.3501, "theoretical_loss": 3.405956046409026, "tokens_seen": 2180644864 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.011251941323280334, "objective/train/docs_used": 1235717, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.1412906646728516, "objective/train/original_loss": 2.1412904262542725, "objective/train/theoretical_loss": 3.4059140992861345, "objective/train/tokens_used": 539831776, "objective/train/value_avg": -0.0239410400390625, "objective/train/value_loss": 0.003397588385269046, "objective/train/value_max": -0.0004973411560058594, "objective/train/value_min": -0.982421875, "objective/train/value_reward_corr": 0.6776267119077362, "objective/train/value_std": 0.06658935546875, "objective/train/weight_avg": 1.001142144203186, "objective/train/weighted_lm_loss": 2.1440348625183105, "objective/train/weights_max": 1.089258074760437, "objective/train/weights_min": 0.9222906827926636, "theoretical_loss": 3.4059140992861345, "tokens_seen": 2180972544 }, { "epoch": 0.32, "learning_rate": 0.0006899450371807307, "loss": 1.3086, "theoretical_loss": 3.4058889348843624, "tokens_seen": 2181169152 }, { "epoch": 0.32, "learning_rate": 0.0006896217264791464, "loss": 1.3087, "theoretical_loss": 3.405821844004982, "tokens_seen": 2181693440 }, { "epoch": 0.32, "learning_rate": 0.0006892984157775622, "loss": 1.3395, "theoretical_loss": 3.405754773759575, "tokens_seen": 2182217728 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.011597786098718643, "objective/train/docs_used": 1236933, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.646023750305176, "objective/train/original_loss": 2.6460235118865967, "objective/train/theoretical_loss": 3.4057044846097657, "objective/train/tokens_used": 541470176, "objective/train/value_avg": -0.0179443359375, "objective/train/value_loss": 0.0013678197283297777, "objective/train/value_max": -0.0008969306945800781, "objective/train/value_min": -0.460693359375, "objective/train/value_reward_corr": 0.20475553666697685, "objective/train/value_std": 0.0204010009765625, "objective/train/weight_avg": 1.001166582107544, "objective/train/weighted_lm_loss": 2.649038553237915, "objective/train/weights_max": 1.0356913805007935, "objective/train/weights_min": 0.9319064021110535, "theoretical_loss": 3.4057044846097657, "tokens_seen": 2182610944 }, { "epoch": 0.32, "learning_rate": 0.0006889751050759781, "loss": 1.3028, "theoretical_loss": 3.4056877241368397, "tokens_seen": 2182742016 }, { "epoch": 0.32, "learning_rate": 0.0006886517943743938, "loss": 1.3634, "theoretical_loss": 3.405620695125484, "tokens_seen": 2183266304 }, { "epoch": 0.32, "learning_rate": 0.0006883284836728096, "loss": 1.329, "theoretical_loss": 3.405553686714225, "tokens_seen": 2183790592 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.002532712649554014, "objective/train/docs_used": 1237677, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.862225294113159, "objective/train/original_loss": 2.8622255325317383, "objective/train/theoretical_loss": 3.405495071244026, "objective/train/tokens_used": 543108576, "objective/train/value_avg": -0.023223876953125, "objective/train/value_loss": 0.007174552418291569, "objective/train/value_max": -0.0005397796630859375, "objective/train/value_min": -0.84814453125, "objective/train/value_reward_corr": 0.5704480034442844, "objective/train/value_std": 0.05267333984375, "objective/train/weight_avg": 1.0002886056900024, "objective/train/weighted_lm_loss": 2.862760066986084, "objective/train/weights_max": 1.0729271173477173, "objective/train/weights_min": 0.9173164963722229, "theoretical_loss": 3.405495071244026, "tokens_seen": 2184249344 }, { "epoch": 0.32, "learning_rate": 0.0006880051729712253, "loss": 1.289, "theoretical_loss": 3.4054866988917873, "tokens_seen": 2184314880 }, { "epoch": 0.32, "learning_rate": 0.0006876818622696411, "loss": 1.3131, "theoretical_loss": 3.405419731646906, "tokens_seen": 2184839168 }, { "epoch": 0.32, "learning_rate": 0.000687358551568057, "loss": 1.3239, "theoretical_loss": 3.4053527849683247, "tokens_seen": 2185363456 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.008604628965258598, "objective/train/docs_used": 1238885, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.794611692428589, "objective/train/original_loss": 2.794611692428589, "objective/train/theoretical_loss": 3.4052858588447945, "objective/train/tokens_used": 544746976, "objective/train/value_avg": -0.0160980224609375, "objective/train/value_loss": 0.0023530758917331696, "objective/train/value_max": -0.0009965896606445312, "objective/train/value_min": -0.80615234375, "objective/train/value_reward_corr": 0.519370945637178, "objective/train/value_std": 0.0234222412109375, "objective/train/weight_avg": 1.000872015953064, "objective/train/weighted_lm_loss": 2.7974579334259033, "objective/train/weights_max": 1.02890944480896, "objective/train/weights_min": 0.9088962078094482, "theoretical_loss": 3.4052858588447945, "tokens_seen": 2185887744 }, { "epoch": 0.32, "learning_rate": 0.0006870352408664727, "loss": 1.3145, "theoretical_loss": 3.4052858588447945, "tokens_seen": 2185887744 }, { "epoch": 0.32, "learning_rate": 0.0006867119301648884, "loss": 1.3405, "theoretical_loss": 3.405218953265077, "tokens_seen": 2186412032 }, { "epoch": 0.32, "learning_rate": 0.0006863886194633042, "loss": 1.2823, "theoretical_loss": 3.405152068217942, "tokens_seen": 2186936320 }, { "epoch": 0.32, "learning_rate": 0.00068606530876172, "loss": 1.301, "theoretical_loss": 3.4050852036921686, "tokens_seen": 2187460608 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.01029767282307148, "objective/train/docs_used": 1239462, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6742913722991943, "objective/train/original_loss": 2.6742916107177734, "objective/train/theoretical_loss": 3.405076847068796, "objective/train/tokens_used": 546385376, "objective/train/value_avg": -0.05792236328125, "objective/train/value_loss": 0.009217957966029644, "objective/train/value_max": -0.0004820823669433594, "objective/train/value_min": -0.916015625, "objective/train/value_reward_corr": 0.751180919020314, "objective/train/value_std": 0.139404296875, "objective/train/weight_avg": 1.0010755062103271, "objective/train/weighted_lm_loss": 2.6747078895568848, "objective/train/weights_max": 1.050797939300537, "objective/train/weights_min": 0.9163839221000671, "theoretical_loss": 3.405076847068796, "tokens_seen": 2187526144 }, { "epoch": 0.32, "learning_rate": 0.0006857419980601359, "loss": 1.3372, "theoretical_loss": 3.405018359676543, "tokens_seen": 2187984896 }, { "epoch": 0.32, "learning_rate": 0.0006854186873585516, "loss": 1.3173, "theoretical_loss": 3.4049515361598623, "tokens_seen": 2188509184 }, { "epoch": 0.32, "learning_rate": 0.0006850953766569673, "loss": 1.313, "theoretical_loss": 3.404884733130932, "tokens_seen": 2189033472 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0017279792809858918, "objective/train/docs_used": 1240823, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7591705322265625, "objective/train/original_loss": 2.7591700553894043, "objective/train/theoretical_loss": 3.4048680355735987, "objective/train/tokens_used": 548023776, "objective/train/value_avg": -0.023651123046875, "objective/train/value_loss": 0.007940702140331268, "objective/train/value_max": -0.0005421638488769531, "objective/train/value_min": -0.97900390625, "objective/train/value_reward_corr": 0.5869880811721474, "objective/train/value_std": 0.0550537109375, "objective/train/weight_avg": 1.0002118349075317, "objective/train/weighted_lm_loss": 2.7598280906677246, "objective/train/weights_max": 1.0742956399917603, "objective/train/weights_min": 0.9091658592224121, "theoretical_loss": 3.4048680355735987, "tokens_seen": 2189164544 }, { "epoch": 0.32, "learning_rate": 0.0006847720659553831, "loss": 1.3411, "theoretical_loss": 3.4048179505785647, "tokens_seen": 2189557760 }, { "epoch": 0.32, "learning_rate": 0.0006844487552537989, "loss": 1.3109, "theoretical_loss": 3.404751188491584, "tokens_seen": 2190082048 }, { "epoch": 0.32, "learning_rate": 0.0006841254445522148, "loss": 1.3525, "theoretical_loss": 3.4046844468588215, "tokens_seen": 2190606336 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.006977139972150326, "objective/train/docs_used": 1241523, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.057482957839966, "objective/train/original_loss": 3.057482957839966, "objective/train/theoretical_loss": 3.40465942401761, "objective/train/tokens_used": 549662176, "objective/train/value_avg": -0.031158447265625, "objective/train/value_loss": 0.006202539894729853, "objective/train/value_max": -0.0007319450378417969, "objective/train/value_min": -0.98681640625, "objective/train/value_reward_corr": 0.6719082239678447, "objective/train/value_std": 0.0623779296875, "objective/train/weight_avg": 1.0007283687591553, "objective/train/weighted_lm_loss": 3.059574842453003, "objective/train/weights_max": 1.056964635848999, "objective/train/weights_min": 0.9065938591957092, "theoretical_loss": 3.40465942401761, "tokens_seen": 2190802944 }, { "epoch": 0.32, "learning_rate": 0.0006838021338506305, "loss": 1.33, "theoretical_loss": 3.4046177256691164, "tokens_seen": 2191130624 }, { "epoch": 0.32, "learning_rate": 0.0006834788231490463, "loss": 1.3254, "theoretical_loss": 3.4045510249113184, "tokens_seen": 2191654912 }, { "epoch": 0.32, "learning_rate": 0.000683155512447462, "loss": 1.3335, "theoretical_loss": 3.404484344574285, "tokens_seen": 2192179200 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.010664316825568676, "objective/train/docs_used": 1242696, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6571123600006104, "objective/train/original_loss": 2.6571123600006104, "objective/train/theoretical_loss": 3.404451012060076, "objective/train/tokens_used": 551300576, "objective/train/value_avg": -0.0253448486328125, "objective/train/value_loss": 0.002568309661000967, "objective/train/value_max": -0.0009737014770507812, "objective/train/value_min": -0.9599609375, "objective/train/value_reward_corr": 0.6844575427616911, "objective/train/value_std": 0.05059814453125, "objective/train/weight_avg": 1.0010792016983032, "objective/train/weighted_lm_loss": 2.6604413986206055, "objective/train/weights_max": 1.0603437423706055, "objective/train/weights_min": 0.9163856506347656, "theoretical_loss": 3.404451012060076, "tokens_seen": 2192441344 }, { "epoch": 0.32, "learning_rate": 0.0006828322017458778, "loss": 1.287, "theoretical_loss": 3.404417684646883, "tokens_seen": 2192703488 }, { "epoch": 0.32, "learning_rate": 0.0006825088910442935, "loss": 1.3175, "theoretical_loss": 3.4043510451179873, "tokens_seen": 2193227776 }, { "epoch": 0.32, "learning_rate": 0.0006821855803427094, "loss": 1.3135, "theoretical_loss": 3.4042844259764813, "tokens_seen": 2193752064 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.009843279607594013, "objective/train/docs_used": 1243185, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5798776149749756, "objective/train/original_loss": 2.579878091812134, "objective/train/theoretical_loss": 3.404242799361076, "objective/train/tokens_used": 552938976, "objective/train/value_avg": -0.01557159423828125, "objective/train/value_loss": 0.0011289735557511449, "objective/train/value_max": -0.0004172325134277344, "objective/train/value_min": -0.51318359375, "objective/train/value_reward_corr": 0.3749886232697112, "objective/train/value_std": 0.0177154541015625, "objective/train/weight_avg": 1.0009899139404297, "objective/train/weighted_lm_loss": 2.5826900005340576, "objective/train/weights_max": 1.0424177646636963, "objective/train/weights_min": 0.9107033014297485, "theoretical_loss": 3.404242799361076, "tokens_seen": 2194079744 }, { "epoch": 0.33, "learning_rate": 0.0006818622696411252, "loss": 1.332, "theoretical_loss": 3.404217827211258, "tokens_seen": 2194276352 }, { "epoch": 0.33, "learning_rate": 0.0006815389589395409, "loss": 1.3379, "theoretical_loss": 3.4041512488112193, "tokens_seen": 2194800640 }, { "epoch": 0.33, "learning_rate": 0.0006812156482379567, "loss": 1.3093, "theoretical_loss": 3.404084690765274, "tokens_seen": 2195324928 }, { "epoch": 0.33, "objective/train/advantage_avg": -0.004187243524938822, "objective/train/docs_used": 1244493, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.623321294784546, "objective/train/original_loss": 2.623321056365967, "objective/train/theoretical_loss": 3.404034785581523, "objective/train/tokens_used": 554577376, "objective/train/value_avg": -0.02197265625, "objective/train/value_loss": 0.00669842679053545, "objective/train/value_max": -0.0004124641418457031, "objective/train/value_min": -0.98681640625, "objective/train/value_reward_corr": 0.49020300244854675, "objective/train/value_std": 0.037506103515625, "objective/train/weight_avg": 0.9996143579483032, "objective/train/weighted_lm_loss": 2.6246426105499268, "objective/train/weights_max": 1.0409655570983887, "objective/train/weights_min": 0.9070945382118225, "theoretical_loss": 3.404034785581523, "tokens_seen": 2195718144 }, { "epoch": 0.33, "learning_rate": 0.0006808923375363724, "loss": 1.3449, "theoretical_loss": 3.4040181530623417, "tokens_seen": 2195849216 }, { "epoch": 0.33, "learning_rate": 0.0006805690268347883, "loss": 1.3656, "theoretical_loss": 3.4039516356913495, "tokens_seen": 2196373504 }, { "epoch": 0.33, "learning_rate": 0.0006802457161332041, "loss": 1.328, "theoretical_loss": 3.4038851386412334, "tokens_seen": 2196897792 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.006207495927810669, "objective/train/docs_used": 1244898, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5669567584991455, "objective/train/original_loss": 2.5669567584991455, "objective/train/theoretical_loss": 3.403826970383159, "objective/train/tokens_used": 556215776, "objective/train/value_avg": -0.01383209228515625, "objective/train/value_loss": 0.001737378304824233, "objective/train/value_max": -0.00038743019104003906, "objective/train/value_min": -0.64794921875, "objective/train/value_reward_corr": 0.5001565131263025, "objective/train/value_std": 0.018798828125, "objective/train/weight_avg": 1.0006293058395386, "objective/train/weighted_lm_loss": 2.5688748359680176, "objective/train/weights_max": 1.0610454082489014, "objective/train/weights_min": 0.9150390028953552, "theoretical_loss": 3.403826970383159, "tokens_seen": 2197356544 }, { "epoch": 0.33, "learning_rate": 0.0006799224054316197, "loss": 1.3443, "theoretical_loss": 3.403818661900938, "tokens_seen": 2197422080 }, { "epoch": 0.33, "learning_rate": 0.0006795990947300356, "loss": 1.348, "theoretical_loss": 3.4037522054594165, "tokens_seen": 2197946368 }, { "epoch": 0.33, "learning_rate": 0.0006792757840284513, "loss": 1.3365, "theoretical_loss": 3.403685769305631, "tokens_seen": 2198470656 }, { "epoch": 0.33, "objective/train/advantage_avg": -0.015270376577973366, "objective/train/docs_used": 1246280, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3329825401306152, "objective/train/original_loss": 2.3329825401306152, "objective/train/theoretical_loss": 3.403619353428553, "objective/train/tokens_used": 557854176, "objective/train/value_avg": -0.01953125, "objective/train/value_loss": 0.00859568826854229, "objective/train/value_max": -0.0009546279907226562, "objective/train/value_min": -0.83203125, "objective/train/value_reward_corr": 0.4563293805254058, "objective/train/value_std": 0.0284881591796875, "objective/train/weight_avg": 0.998515248298645, "objective/train/weighted_lm_loss": 2.330075740814209, "objective/train/weights_max": 1.024061918258667, "objective/train/weights_min": 0.9137488007545471, "theoretical_loss": 3.403619353428553, "tokens_seen": 2198994944 }, { "epoch": 0.33, "learning_rate": 0.0006789524733268672, "loss": 1.3291, "theoretical_loss": 3.403619353428553, "tokens_seen": 2198994944 }, { "epoch": 0.33, "learning_rate": 0.000678629162625283, "loss": 1.3753, "theoretical_loss": 3.40355295781716, "tokens_seen": 2199519232 }, { "epoch": 0.33, "learning_rate": 0.0006783058519236986, "loss": 1.3353, "theoretical_loss": 3.4034865824604417, "tokens_seen": 2200043520 }, { "epoch": 0.33, "learning_rate": 0.0006779825412221145, "loss": 1.3387, "theoretical_loss": 3.403420227347393, "tokens_seen": 2200567808 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.010404769331216812, "objective/train/docs_used": 1246987, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7070045471191406, "objective/train/original_loss": 2.7070043087005615, "objective/train/theoretical_loss": 3.4034119343810976, "objective/train/tokens_used": 559492576, "objective/train/value_avg": -0.0164031982421875, "objective/train/value_loss": 0.002098935190588236, "objective/train/value_max": -0.0005679130554199219, "objective/train/value_min": -0.99560546875, "objective/train/value_reward_corr": 0.5897961768348421, "objective/train/value_std": 0.035125732421875, "objective/train/weight_avg": 1.0010508298873901, "objective/train/weighted_lm_loss": 2.710245132446289, "objective/train/weights_max": 1.0653777122497559, "objective/train/weights_min": 0.9067119359970093, "theoretical_loss": 3.4034119343810976, "tokens_seen": 2200633344 }, { "epoch": 0.33, "learning_rate": 0.0006776592305205302, "loss": 1.3425, "theoretical_loss": 3.40335389246702, "tokens_seen": 2201092096 }, { "epoch": 0.33, "learning_rate": 0.000677335919818946, "loss": 1.3163, "theoretical_loss": 3.4032875778083356, "tokens_seen": 2201616384 }, { "epoch": 0.33, "learning_rate": 0.0006770126091173619, "loss": 1.3133, "theoretical_loss": 3.4032212833603626, "tokens_seen": 2202140672 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.008753268048167229, "objective/train/docs_used": 1247624, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5258305072784424, "objective/train/original_loss": 2.525830030441284, "objective/train/theoretical_loss": 3.4032047129050076, "objective/train/tokens_used": 561130976, "objective/train/value_avg": -0.0181427001953125, "objective/train/value_loss": 0.002055296441540122, "objective/train/value_max": -0.0007152557373046875, "objective/train/value_min": -0.81298828125, "objective/train/value_reward_corr": 0.5742153203483045, "objective/train/value_std": 0.034454345703125, "objective/train/weight_avg": 1.0008854866027832, "objective/train/weighted_lm_loss": 2.529000759124756, "objective/train/weights_max": 1.0484836101531982, "objective/train/weights_min": 0.9297380447387695, "theoretical_loss": 3.4032047129050076, "tokens_seen": 2202271744 }, { "epoch": 0.33, "learning_rate": 0.0006766892984157775, "loss": 1.3553, "theoretical_loss": 3.4031550091121314, "tokens_seen": 2202664960 }, { "epoch": 0.33, "learning_rate": 0.0006763659877141934, "loss": 1.3554, "theoretical_loss": 3.4030887550526816, "tokens_seen": 2203189248 }, { "epoch": 0.33, "learning_rate": 0.0006760426770126091, "loss": 1.3648, "theoretical_loss": 3.4030225211710614, "tokens_seen": 2203713536 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.008187931962311268, "objective/train/docs_used": 1248832, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.734811305999756, "objective/train/original_loss": 2.734811305999756, "objective/train/theoretical_loss": 3.4029976886653164, "objective/train/tokens_used": 562769376, "objective/train/value_avg": -0.0259857177734375, "objective/train/value_loss": 0.0020124150905758142, "objective/train/value_max": -0.000537872314453125, "objective/train/value_min": -0.9482421875, "objective/train/value_reward_corr": 0.6216676894197916, "objective/train/value_std": 0.035369873046875, "objective/train/weight_avg": 1.0008288621902466, "objective/train/weighted_lm_loss": 2.7379369735717773, "objective/train/weights_max": 1.049598217010498, "objective/train/weights_min": 0.9196120500564575, "theoretical_loss": 3.4029976886653164, "tokens_seen": 2203910144 }, { "epoch": 0.33, "learning_rate": 0.0006757193663110249, "loss": 1.2937, "theoretical_loss": 3.4029563074563267, "tokens_seen": 2204237824 }, { "epoch": 0.33, "learning_rate": 0.0006753960556094408, "loss": 1.304, "theoretical_loss": 3.4028901138975423, "tokens_seen": 2204762112 }, { "epoch": 0.33, "learning_rate": 0.0006750727449078564, "loss": 1.3207, "theoretical_loss": 3.4028239404837826, "tokens_seen": 2205286400 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.005084687378257513, "objective/train/docs_used": 1249474, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4253501892089844, "objective/train/original_loss": 2.4253504276275635, "objective/train/theoretical_loss": 3.4027908613278743, "objective/train/tokens_used": 564407776, "objective/train/value_avg": -0.01267242431640625, "objective/train/value_loss": 0.002932481002062559, "objective/train/value_max": -0.0005660057067871094, "objective/train/value_min": -0.96435546875, "objective/train/value_reward_corr": 0.3787190362800672, "objective/train/value_std": 0.01812744140625, "objective/train/weight_avg": 1.0005227327346802, "objective/train/weighted_lm_loss": 2.4269416332244873, "objective/train/weights_max": 1.0179986953735352, "objective/train/weights_min": 0.909927487373352, "theoretical_loss": 3.4027908613278743, "tokens_seen": 2205548544 }, { "epoch": 0.33, "learning_rate": 0.0006747494342062723, "loss": 1.3209, "theoretical_loss": 3.402757787204129, "tokens_seen": 2205810688 }, { "epoch": 0.33, "learning_rate": 0.000674426123504688, "loss": 1.313, "theoretical_loss": 3.402691654047672, "tokens_seen": 2206334976 }, { "epoch": 0.33, "learning_rate": 0.0006741028128031038, "loss": 1.2943, "theoretical_loss": 3.4026255410035113, "tokens_seen": 2206859264 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.001866757171228528, "objective/train/docs_used": 1250567, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8147780895233154, "objective/train/original_loss": 2.8147776126861572, "objective/train/theoretical_loss": 3.4025842305593454, "objective/train/tokens_used": 566046176, "objective/train/value_avg": -0.0299530029296875, "objective/train/value_loss": 0.0112279849126935, "objective/train/value_max": -0.0005793571472167969, "objective/train/value_min": -0.97900390625, "objective/train/value_reward_corr": 0.5360419631110088, "objective/train/value_std": 0.06549072265625, "objective/train/weight_avg": 1.0002416372299194, "objective/train/weighted_lm_loss": 2.815140962600708, "objective/train/weights_max": 1.066742181777954, "objective/train/weights_min": 0.9070262312889099, "theoretical_loss": 3.4025842305593454, "tokens_seen": 2207186944 }, { "epoch": 0.33, "learning_rate": 0.0006737795021015197, "loss": 1.3217, "theoretical_loss": 3.4025594480607535, "tokens_seen": 2207383552 }, { "epoch": 0.33, "learning_rate": 0.0006734561913999353, "loss": 1.3195, "theoretical_loss": 3.4024933752085156, "tokens_seen": 2207907840 }, { "epoch": 0.33, "learning_rate": 0.0006731328806983511, "loss": 1.3078, "theoretical_loss": 3.4024273224359214, "tokens_seen": 2208432128 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.00922344159334898, "objective/train/docs_used": 1251703, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.999570369720459, "objective/train/original_loss": 2.999569892883301, "objective/train/theoretical_loss": 3.4023777960272046, "objective/train/tokens_used": 567684576, "objective/train/value_avg": -0.01397705078125, "objective/train/value_loss": 0.0009906524792313576, "objective/train/value_max": -0.00075531005859375, "objective/train/value_min": -0.521484375, "objective/train/value_reward_corr": 0.1976816516344142, "objective/train/value_std": 0.0157012939453125, "objective/train/weight_avg": 1.0009273290634155, "objective/train/weighted_lm_loss": 3.002964973449707, "objective/train/weights_max": 1.0408819913864136, "objective/train/weights_min": 0.9594298601150513, "theoretical_loss": 3.4023777960272046, "tokens_seen": 2208825344 }, { "epoch": 0.33, "learning_rate": 0.0006728095699967669, "loss": 1.3515, "theoretical_loss": 3.402361289732104, "tokens_seen": 2208956416 }, { "epoch": 0.33, "learning_rate": 0.0006724862592951827, "loss": 1.3257, "theoretical_loss": 3.402295277086205, "tokens_seen": 2209480704 }, { "epoch": 0.33, "learning_rate": 0.0006721629485935985, "loss": 1.3251, "theoretical_loss": 3.4022292844873743, "tokens_seen": 2210004992 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.010845446959137917, "objective/train/docs_used": 1252332, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.214496374130249, "objective/train/original_loss": 2.214495897293091, "objective/train/theoretical_loss": 3.402171557399735, "objective/train/tokens_used": 569322976, "objective/train/value_avg": -0.015350341796875, "objective/train/value_loss": 0.0008034831844270229, "objective/train/value_max": -0.0006589889526367188, "objective/train/value_min": -0.2022705078125, "objective/train/value_reward_corr": 0.1519557910683194, "objective/train/value_std": 0.0166778564453125, "objective/train/weight_avg": 1.0010885000228882, "objective/train/weighted_lm_loss": 2.216721296310425, "objective/train/weights_max": 1.0188052654266357, "objective/train/weights_min": 0.9350088834762573, "theoretical_loss": 3.402171557399735, "tokens_seen": 2210463744 }, { "epoch": 0.34, "learning_rate": 0.0006718396378920142, "loss": 1.313, "theoretical_loss": 3.4021633119247703, "tokens_seen": 2210529280 }, { "epoch": 0.34, "learning_rate": 0.00067151632719043, "loss": 1.334, "theoretical_loss": 3.402097359387559, "tokens_seen": 2211053568 }, { "epoch": 0.34, "learning_rate": 0.0006711930164888458, "loss": 1.2934, "theoretical_loss": 3.402031426864917, "tokens_seen": 2211577856 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.007060572970658541, "objective/train/docs_used": 1253043, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7764954566955566, "objective/train/original_loss": 2.7764952182769775, "objective/train/theoretical_loss": 3.401965514346027, "objective/train/tokens_used": 570961376, "objective/train/value_avg": -0.0196075439453125, "objective/train/value_loss": 0.003126104362308979, "objective/train/value_max": -0.0003981590270996094, "objective/train/value_min": -0.99169921875, "objective/train/value_reward_corr": 0.6737336587690154, "objective/train/value_std": 0.05120849609375, "objective/train/weight_avg": 1.0007215738296509, "objective/train/weighted_lm_loss": 2.7783308029174805, "objective/train/weights_max": 1.064166784286499, "objective/train/weights_min": 0.9184340834617615, "theoretical_loss": 3.401965514346027, "tokens_seen": 2212102144 }, { "epoch": 0.34, "learning_rate": 0.0006708697057872616, "loss": 1.2993, "theoretical_loss": 3.401965514346027, "tokens_seen": 2212102144 }, { "epoch": 0.34, "learning_rate": 0.0006705463950856773, "loss": 1.3004, "theoretical_loss": 3.4018996218200805, "tokens_seen": 2212626432 }, { "epoch": 0.34, "learning_rate": 0.0006702230843840931, "loss": 1.2893, "theoretical_loss": 3.401833749276279, "tokens_seen": 2213150720 }, { "epoch": 0.34, "learning_rate": 0.0006698997736825089, "loss": 1.347, "theoretical_loss": 3.4017678967038307, "tokens_seen": 2213675008 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.007430842146277428, "objective/train/docs_used": 1254214, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5923242568969727, "objective/train/original_loss": 2.5923242568969727, "objective/train/theoretical_loss": 3.4017596665359733, "objective/train/tokens_used": 572599776, "objective/train/value_avg": -0.024871826171875, "objective/train/value_loss": 0.0022759896237403154, "objective/train/value_max": -0.0007762908935546875, "objective/train/value_min": -0.984375, "objective/train/value_reward_corr": 0.8253177680540282, "objective/train/value_std": 0.0626220703125, "objective/train/weight_avg": 1.0007542371749878, "objective/train/weighted_lm_loss": 2.5961074829101562, "objective/train/weights_max": 1.0502697229385376, "objective/train/weights_min": 0.9136180281639099, "theoretical_loss": 3.4017596665359733, "tokens_seen": 2213740544 }, { "epoch": 0.34, "learning_rate": 0.0006695764629809247, "loss": 1.2776, "theoretical_loss": 3.4017020640919524, "tokens_seen": 2214199296 }, { "epoch": 0.34, "learning_rate": 0.0006692531522793405, "loss": 1.3157, "theoretical_loss": 3.4016362514298706, "tokens_seen": 2214723584 }, { "epoch": 0.34, "learning_rate": 0.0006689298415777562, "loss": 1.2832, "theoretical_loss": 3.4015704587068187, "tokens_seen": 2215247872 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.005499918945133686, "objective/train/docs_used": 1254739, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9984145164489746, "objective/train/original_loss": 2.9984140396118164, "objective/train/theoretical_loss": 3.4015540136402684, "objective/train/tokens_used": 574238176, "objective/train/value_avg": -0.01445770263671875, "objective/train/value_loss": 0.0024031433276832104, "objective/train/value_max": -0.0007734298706054688, "objective/train/value_min": -0.425537109375, "objective/train/value_reward_corr": 0.39661132543310673, "objective/train/value_std": 0.016510009765625, "objective/train/weight_avg": 1.0005618333816528, "objective/train/weighted_lm_loss": 3.0005881786346436, "objective/train/weights_max": 1.0430870056152344, "objective/train/weights_min": 0.9334611892700195, "theoretical_loss": 3.4015540136402684, "tokens_seen": 2215378944 }, { "epoch": 0.34, "learning_rate": 0.0006686065308761721, "loss": 1.3364, "theoretical_loss": 3.401504685912039, "tokens_seen": 2215772160 }, { "epoch": 0.34, "learning_rate": 0.0006682832201745877, "loss": 1.2685, "theoretical_loss": 3.4014389330347816, "tokens_seen": 2216296448 }, { "epoch": 0.34, "learning_rate": 0.0006679599094730035, "loss": 1.2605, "theoretical_loss": 3.4013732000643064, "tokens_seen": 2216820736 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.005679369904100895, "objective/train/docs_used": 1256282, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.576488494873047, "objective/train/original_loss": 2.576488971710205, "objective/train/theoretical_loss": 3.4013485553304044, "objective/train/tokens_used": 575876576, "objective/train/value_avg": -0.0162200927734375, "objective/train/value_loss": 0.0011638423893600702, "objective/train/value_max": -0.001094818115234375, "objective/train/value_min": -0.413330078125, "objective/train/value_reward_corr": 0.40076835629125174, "objective/train/value_std": 0.01409912109375, "objective/train/weight_avg": 1.0005736351013184, "objective/train/weighted_lm_loss": 2.579195022583008, "objective/train/weights_max": 1.0374401807785034, "objective/train/weights_min": 0.9215030670166016, "theoretical_loss": 3.4013485553304044, "tokens_seen": 2217017344 }, { "epoch": 0.34, "learning_rate": 0.0006676365987714194, "loss": 1.315, "theoretical_loss": 3.4013074869898805, "tokens_seen": 2217345024 }, { "epoch": 0.34, "learning_rate": 0.0006673132880698351, "loss": 1.3105, "theoretical_loss": 3.4012417938007786, "tokens_seen": 2217869312 }, { "epoch": 0.34, "learning_rate": 0.000666989977368251, "loss": 1.3023, "theoretical_loss": 3.401176120486286, "tokens_seen": 2218393600 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.004325104411691427, "objective/train/docs_used": 1256744, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.612881660461426, "objective/train/original_loss": 2.6128809452056885, "objective/train/theoretical_loss": 3.4011432912786708, "objective/train/tokens_used": 577514976, "objective/train/value_avg": -0.018218994140625, "objective/train/value_loss": 0.002904750406742096, "objective/train/value_max": -0.0006771087646484375, "objective/train/value_min": -0.9873046875, "objective/train/value_reward_corr": 0.499280575094786, "objective/train/value_std": 0.030792236328125, "objective/train/weight_avg": 1.0004467964172363, "objective/train/weighted_lm_loss": 2.6146204471588135, "objective/train/weights_max": 1.0982160568237305, "objective/train/weights_min": 0.9121901988983154, "theoretical_loss": 3.4011432912786708, "tokens_seen": 2218655744 }, { "epoch": 0.34, "learning_rate": 0.0006666666666666666, "loss": 1.3068, "theoretical_loss": 3.4011104670356938, "tokens_seen": 2218917888 }, { "epoch": 0.34, "learning_rate": 0.0006663433559650824, "loss": 1.3184, "theoretical_loss": 3.4010448334383026, "tokens_seen": 2219442176 }, { "epoch": 0.34, "learning_rate": 0.0006660200452634983, "loss": 1.3519, "theoretical_loss": 3.400979219683422, "tokens_seen": 2219966464 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0034673018380999565, "objective/train/docs_used": 1258010, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.216805934906006, "objective/train/original_loss": 2.2168054580688477, "objective/train/theoretical_loss": 3.400938221158149, "objective/train/tokens_used": 579153376, "objective/train/value_avg": -0.0162200927734375, "objective/train/value_loss": 0.0009902478195726871, "objective/train/value_max": -0.0004935264587402344, "objective/train/value_min": -0.7607421875, "objective/train/value_reward_corr": 0.520891772561427, "objective/train/value_std": 0.017578125, "objective/train/weight_avg": 1.0003516674041748, "objective/train/weighted_lm_loss": 2.2186079025268555, "objective/train/weights_max": 1.018893837928772, "objective/train/weights_min": 0.9257304072380066, "theoretical_loss": 3.400938221158149, "tokens_seen": 2220294144 }, { "epoch": 0.34, "learning_rate": 0.000665696734561914, "loss": 1.3353, "theoretical_loss": 3.4009136257603685, "tokens_seen": 2220490752 }, { "epoch": 0.34, "learning_rate": 0.0006653734238603299, "loss": 1.3037, "theoretical_loss": 3.4008480516584676, "tokens_seen": 2221015040 }, { "epoch": 0.34, "learning_rate": 0.0006650501131587455, "loss": 1.2974, "theoretical_loss": 3.4007824973670533, "tokens_seen": 2221539328 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.013533786870539188, "objective/train/docs_used": 1258716, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6383533477783203, "objective/train/original_loss": 2.6383535861968994, "objective/train/theoretical_loss": 3.400733344642712, "objective/train/tokens_used": 580791776, "objective/train/value_avg": -0.021453857421875, "objective/train/value_loss": 0.0025733059737831354, "objective/train/value_max": -0.0009365081787109375, "objective/train/value_min": -0.98974609375, "objective/train/value_reward_corr": 0.46869101194408064, "objective/train/value_std": 0.031890869140625, "objective/train/weight_avg": 1.0013660192489624, "objective/train/weighted_lm_loss": 2.6423094272613525, "objective/train/weights_max": 1.0652347803115845, "objective/train/weights_min": 0.9100506901741028, "theoretical_loss": 3.400733344642712, "tokens_seen": 2221932544 }, { "epoch": 0.34, "learning_rate": 0.0006647268024571613, "loss": 1.2726, "theoretical_loss": 3.4007169628754665, "tokens_seen": 2222063616 }, { "epoch": 0.34, "learning_rate": 0.0006644034917555772, "loss": 1.3232, "theoretical_loss": 3.4006514481730585, "tokens_seen": 2222587904 }, { "epoch": 0.34, "learning_rate": 0.0006640801810539929, "loss": 1.3198, "theoretical_loss": 3.400585953249187, "tokens_seen": 2223112192 }, { "epoch": 0.34, "objective/train/advantage_avg": -0.005674213636666536, "objective/train/docs_used": 1259970, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.622377634048462, "objective/train/original_loss": 2.622377634048462, "objective/train/theoretical_loss": 3.4005286614070207, "objective/train/tokens_used": 582430176, "objective/train/value_avg": -0.019744873046875, "objective/train/value_loss": 0.008090010844171047, "objective/train/value_max": -0.0005335807800292969, "objective/train/value_min": -0.6123046875, "objective/train/value_reward_corr": 0.551975019555091, "objective/train/value_std": 0.031494140625, "objective/train/weight_avg": 0.999472439289093, "objective/train/weighted_lm_loss": 2.620776414871216, "objective/train/weights_max": 1.035191297531128, "objective/train/weights_min": 0.9454022645950317, "theoretical_loss": 3.4005286614070207, "tokens_seen": 2223570944 }, { "epoch": 0.34, "learning_rate": 0.0006637568703524087, "loss": 1.3504, "theoretical_loss": 3.4005204780932186, "tokens_seen": 2223636480 }, { "epoch": 0.34, "learning_rate": 0.0006634335596508244, "loss": 1.2982, "theoretical_loss": 3.4004550226945285, "tokens_seen": 2224160768 }, { "epoch": 0.34, "learning_rate": 0.0006631102489492402, "loss": 1.2609, "theoretical_loss": 3.4003895870424996, "tokens_seen": 2224685056 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0101103400811553, "objective/train/docs_used": 1260594, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.717944860458374, "objective/train/original_loss": 2.717944622039795, "objective/train/theoretical_loss": 3.4003241711265226, "objective/train/tokens_used": 584068576, "objective/train/value_avg": -0.0130767822265625, "objective/train/value_loss": 0.00041317936847917736, "objective/train/value_max": -0.000583648681640625, "objective/train/value_min": -0.331787109375, "objective/train/value_reward_corr": 0.23382901875920223, "objective/train/value_std": 0.01172637939453125, "objective/train/weight_avg": 1.0010130405426025, "objective/train/weighted_lm_loss": 2.720815420150757, "objective/train/weights_max": 1.0293793678283691, "objective/train/weights_min": 0.9742960929870605, "theoretical_loss": 3.4003241711265226, "tokens_seen": 2225209344 }, { "epoch": 0.34, "learning_rate": 0.0006627869382476561, "loss": 1.3242, "theoretical_loss": 3.4003241711265226, "tokens_seen": 2225209344 }, { "epoch": 0.34, "learning_rate": 0.0006624636275460718, "loss": 1.3159, "theoretical_loss": 3.4002587749359976, "tokens_seen": 2225733632 }, { "epoch": 0.34, "learning_rate": 0.0006621403168444876, "loss": 1.2867, "theoretical_loss": 3.400193398460332, "tokens_seen": 2226257920 }, { "epoch": 0.34, "learning_rate": 0.0006618170061429033, "loss": 1.3066, "theoretical_loss": 3.4001280416889417, "tokens_seen": 2226782208 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.006850486155599356, "objective/train/docs_used": 1261463, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6799166202545166, "objective/train/original_loss": 2.6799166202545166, "objective/train/theoretical_loss": 3.4001198734774483, "objective/train/tokens_used": 585706976, "objective/train/value_avg": -0.029144287109375, "objective/train/value_loss": 0.005340535659343004, "objective/train/value_max": -0.0004916191101074219, "objective/train/value_min": -0.986328125, "objective/train/value_reward_corr": 0.5811637084533804, "objective/train/value_std": 0.055999755859375, "objective/train/weight_avg": 1.000711441040039, "objective/train/weighted_lm_loss": 2.6821703910827637, "objective/train/weights_max": 1.0609620809555054, "objective/train/weights_min": 0.9164710640907288, "theoretical_loss": 3.4001198734774483, "tokens_seen": 2226847744 }, { "epoch": 0.35, "learning_rate": 0.0006614936954413191, "loss": 1.308, "theoretical_loss": 3.400062704611251, "tokens_seen": 2227306496 }, { "epoch": 0.35, "learning_rate": 0.0006611703847397348, "loss": 1.2269, "theoretical_loss": 3.399997387216691, "tokens_seen": 2227830784 }, { "epoch": 0.35, "learning_rate": 0.0006608470740381507, "loss": 1.2863, "theoretical_loss": 3.399932089494703, "tokens_seen": 2228355072 }, { "epoch": 0.35, "objective/train/advantage_avg": -0.01019098237156868, "objective/train/docs_used": 1262743, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.280136823654175, "objective/train/original_loss": 2.2801365852355957, "objective/train/theoretical_loss": 3.399915768136809, "objective/train/tokens_used": 587345376, "objective/train/value_avg": -0.0269317626953125, "objective/train/value_loss": 0.004036659374833107, "objective/train/value_max": -0.0007042884826660156, "objective/train/value_min": -0.427734375, "objective/train/value_reward_corr": 0.8619867219519387, "objective/train/value_std": 0.0501708984375, "objective/train/weight_avg": 0.9990009665489197, "objective/train/weighted_lm_loss": 2.2811102867126465, "objective/train/weights_max": 1.0172706842422485, "objective/train/weights_min": 0.9623818397521973, "theoretical_loss": 3.399915768136809, "tokens_seen": 2228486144 }, { "epoch": 0.35, "learning_rate": 0.0006605237633365665, "loss": 1.3041, "theoretical_loss": 3.3998668114347352, "tokens_seen": 2228879360 }, { "epoch": 0.35, "learning_rate": 0.0006602004526349822, "loss": 1.3146, "theoretical_loss": 3.399801553026244, "tokens_seen": 2229403648 }, { "epoch": 0.35, "learning_rate": 0.000659877141933398, "loss": 1.2968, "theoretical_loss": 3.3997363142586945, "tokens_seen": 2229927936 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.001986222807317972, "objective/train/docs_used": 1263995, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.59847354888916, "objective/train/original_loss": 2.5984740257263184, "objective/train/theoretical_loss": 3.3997118547823946, "objective/train/tokens_used": 588983776, "objective/train/value_avg": -0.041229248046875, "objective/train/value_loss": 0.013701432384550571, "objective/train/value_max": -0.00046372413635253906, "objective/train/value_min": -0.99072265625, "objective/train/value_reward_corr": 0.6564708366539564, "objective/train/value_std": 0.1092529296875, "objective/train/weight_avg": 1.0002663135528564, "objective/train/weighted_lm_loss": 2.6001150608062744, "objective/train/weights_max": 1.0999422073364258, "objective/train/weights_min": 0.9068701863288879, "theoretical_loss": 3.3997118547823946, "tokens_seen": 2230124544 }, { "epoch": 0.35, "learning_rate": 0.0006595538312318137, "loss": 1.3095, "theoretical_loss": 3.399671095121559, "tokens_seen": 2230452224 }, { "epoch": 0.35, "learning_rate": 0.0006592305205302296, "loss": 1.3061, "theoretical_loss": 3.399605895604319, "tokens_seen": 2230976512 }, { "epoch": 0.35, "learning_rate": 0.0006589072098286454, "loss": 1.2713, "theoretical_loss": 3.399540715696463, "tokens_seen": 2231500800 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.002642744919285178, "objective/train/docs_used": 1264337, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.308987617492676, "objective/train/original_loss": 2.308987617492676, "objective/train/theoretical_loss": 3.399508133092772, "objective/train/tokens_used": 590622176, "objective/train/value_avg": -0.0189056396484375, "objective/train/value_loss": 0.000979469739831984, "objective/train/value_max": -0.0005660057067871094, "objective/train/value_min": -0.17138671875, "objective/train/value_reward_corr": 0.6870191211238668, "objective/train/value_std": 0.0193634033203125, "objective/train/weight_avg": 1.0002691745758057, "objective/train/weighted_lm_loss": 2.310965061187744, "objective/train/weights_max": 1.0118216276168823, "objective/train/weights_min": 0.9652969241142273, "theoretical_loss": 3.399508133092772, "tokens_seen": 2231762944 }, { "epoch": 0.35, "learning_rate": 0.000658583899127061, "loss": 1.3032, "theoretical_loss": 3.3994755553874887, "tokens_seen": 2232025088 }, { "epoch": 0.35, "learning_rate": 0.0006582605884254769, "loss": 1.3222, "theoretical_loss": 3.3994104146669013, "tokens_seen": 2232549376 }, { "epoch": 0.35, "learning_rate": 0.0006579372777238926, "loss": 1.3356, "theoretical_loss": 3.399345293524214, "tokens_seen": 2233073664 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.009893990121781826, "objective/train/docs_used": 1265500, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3748128414154053, "objective/train/original_loss": 2.3748130798339844, "objective/train/theoretical_loss": 3.3993046027472804, "objective/train/tokens_used": 592260576, "objective/train/value_avg": -0.0208587646484375, "objective/train/value_loss": 0.0008422697428613901, "objective/train/value_max": -0.0007615089416503906, "objective/train/value_min": -0.2401123046875, "objective/train/value_reward_corr": 0.5346227369567266, "objective/train/value_std": 0.021759033203125, "objective/train/weight_avg": 1.0009936094284058, "objective/train/weighted_lm_loss": 2.3772010803222656, "objective/train/weights_max": 1.0167429447174072, "objective/train/weights_min": 0.9635552763938904, "theoretical_loss": 3.3993046027472804, "tokens_seen": 2233401344 }, { "epoch": 0.35, "learning_rate": 0.0006576139670223085, "loss": 1.3125, "theoretical_loss": 3.399280191948948, "tokens_seen": 2233597952 }, { "epoch": 0.35, "learning_rate": 0.0006572906563207243, "loss": 1.3419, "theoretical_loss": 3.3992151099306334, "tokens_seen": 2234122240 }, { "epoch": 0.35, "learning_rate": 0.0006569673456191399, "loss": 1.3506, "theoretical_loss": 3.399150047458807, "tokens_seen": 2234646528 }, { "epoch": 0.35, "objective/train/advantage_avg": -0.0055469307117164135, "objective/train/docs_used": 1266280, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3155112266540527, "objective/train/original_loss": 2.3155107498168945, "objective/train/theoretical_loss": 3.399101263426031, "objective/train/tokens_used": 593898976, "objective/train/value_avg": -0.0136566162109375, "objective/train/value_loss": 0.003256086027249694, "objective/train/value_max": -0.0002758502960205078, "objective/train/value_min": -0.7265625, "objective/train/value_reward_corr": 0.5600857402621211, "objective/train/value_std": 0.023223876953125, "objective/train/weight_avg": 0.9994613528251648, "objective/train/weighted_lm_loss": 2.315974712371826, "objective/train/weights_max": 1.0321367979049683, "objective/train/weights_min": 0.9169082641601562, "theoretical_loss": 3.399101263426031, "tokens_seen": 2235039744 }, { "epoch": 0.35, "learning_rate": 0.0006566440349175558, "loss": 1.2893, "theoretical_loss": 3.3990850045230148, "tokens_seen": 2235170816 }, { "epoch": 0.35, "learning_rate": 0.0006563207242159715, "loss": 1.3353, "theoretical_loss": 3.3990199811128106, "tokens_seen": 2235695104 }, { "epoch": 0.35, "learning_rate": 0.0006559974135143874, "loss": 1.3578, "theoretical_loss": 3.3989549772177554, "tokens_seen": 2236219392 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.004243308212608099, "objective/train/docs_used": 1267649, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8306519985198975, "objective/train/original_loss": 2.8306522369384766, "objective/train/theoretical_loss": 3.3988981148099033, "objective/train/tokens_used": 595537376, "objective/train/value_avg": -0.01983642578125, "objective/train/value_loss": 0.002549584023654461, "objective/train/value_max": -0.0005273818969726562, "objective/train/value_min": -0.9697265625, "objective/train/value_reward_corr": 0.463827295925105, "objective/train/value_std": 0.03179931640625, "objective/train/weight_avg": 1.0004370212554932, "objective/train/weighted_lm_loss": 2.8331143856048584, "objective/train/weights_max": 1.0803472995758057, "objective/train/weights_min": 0.9076674580574036, "theoretical_loss": 3.3988981148099033, "tokens_seen": 2236678144 }, { "epoch": 0.35, "learning_rate": 0.0006556741028128032, "loss": 1.3705, "theoretical_loss": 3.3988899928274194, "tokens_seen": 2236743680 }, { "epoch": 0.35, "learning_rate": 0.0006553507921112188, "loss": 1.3099, "theoretical_loss": 3.3988250279313803, "tokens_seen": 2237267968 }, { "epoch": 0.35, "learning_rate": 0.0006550274814096347, "loss": 1.3501, "theoretical_loss": 3.3987600825192237, "tokens_seen": 2237792256 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.002954860683530569, "objective/train/docs_used": 1268187, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3471202850341797, "objective/train/original_loss": 2.3471202850341797, "objective/train/theoretical_loss": 3.3986951565805437, "objective/train/tokens_used": 597175776, "objective/train/value_avg": -0.0171966552734375, "objective/train/value_loss": 0.002441596472635865, "objective/train/value_max": -0.000583648681640625, "objective/train/value_min": -0.85205078125, "objective/train/value_reward_corr": 0.24494621667400718, "objective/train/value_std": 0.0203704833984375, "objective/train/weight_avg": 1.0003074407577515, "objective/train/weighted_lm_loss": 2.3483686447143555, "objective/train/weights_max": 1.0763732194900513, "objective/train/weights_min": 0.9117861986160278, "theoretical_loss": 3.3986951565805437, "tokens_seen": 2238316544 }, { "epoch": 0.35, "learning_rate": 0.0006547041707080504, "loss": 1.3127, "theoretical_loss": 3.3986951565805437, "tokens_seen": 2238316544 }, { "epoch": 0.35, "learning_rate": 0.0006543808600064662, "loss": 1.3457, "theoretical_loss": 3.3986302501049415, "tokens_seen": 2238840832 }, { "epoch": 0.35, "learning_rate": 0.0006540575493048821, "loss": 1.3351, "theoretical_loss": 3.398565363082027, "tokens_seen": 2239365120 }, { "epoch": 0.35, "learning_rate": 0.0006537342386032978, "loss": 1.3246, "theoretical_loss": 3.398500495501418, "tokens_seen": 2239889408 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.001666651340201497, "objective/train/docs_used": 1268791, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8906359672546387, "objective/train/original_loss": 2.8906357288360596, "objective/train/theoretical_loss": 3.3984923884203626, "objective/train/tokens_used": 598814176, "objective/train/value_avg": -0.0243988037109375, "objective/train/value_loss": 0.006461970508098602, "objective/train/value_max": -0.0005254745483398438, "objective/train/value_min": -0.99560546875, "objective/train/value_reward_corr": 0.5151102463533888, "objective/train/value_std": 0.060089111328125, "objective/train/weight_avg": 1.0001986026763916, "objective/train/weighted_lm_loss": 2.8926424980163574, "objective/train/weights_max": 1.0914891958236694, "objective/train/weights_min": 0.9066168665885925, "theoretical_loss": 3.3984923884203626, "tokens_seen": 2239954944 }, { "epoch": 0.35, "learning_rate": 0.0006534109279017136, "loss": 1.3146, "theoretical_loss": 3.39843564735274, "tokens_seen": 2240413696 }, { "epoch": 0.35, "learning_rate": 0.0006530876172001293, "loss": 1.3191, "theoretical_loss": 3.3983708186256267, "tokens_seen": 2240937984 }, { "epoch": 0.35, "learning_rate": 0.0006527643064985451, "loss": 1.3444, "theoretical_loss": 3.39830600930972, "tokens_seen": 2241462272 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.006342026405036449, "objective/train/docs_used": 1270246, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7987613677978516, "objective/train/original_loss": 2.7987611293792725, "objective/train/theoretical_loss": 3.398289810012531, "objective/train/tokens_used": 600452576, "objective/train/value_avg": -0.022796630859375, "objective/train/value_loss": 0.006213990971446037, "objective/train/value_max": -0.000507354736328125, "objective/train/value_min": -0.99853515625, "objective/train/value_reward_corr": 0.6192722845907991, "objective/train/value_std": 0.064697265625, "objective/train/weight_avg": 1.0006647109985352, "objective/train/weighted_lm_loss": 2.800931692123413, "objective/train/weights_max": 1.075618028640747, "objective/train/weights_min": 0.9071453809738159, "theoretical_loss": 3.398289810012531, "tokens_seen": 2241593344 }, { "epoch": 0.35, "learning_rate": 0.000652440995796961, "loss": 1.3251, "theoretical_loss": 3.3982412193946683, "tokens_seen": 2241986560 }, { "epoch": 0.35, "learning_rate": 0.0006521176850953767, "loss": 1.2734, "theoretical_loss": 3.39817644887013, "tokens_seen": 2242510848 }, { "epoch": 0.35, "learning_rate": 0.0006517943743937924, "loss": 1.2867, "theoretical_loss": 3.39811169772577, "tokens_seen": 2243035136 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.004976190160959959, "objective/train/docs_used": 1270875, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.028968572616577, "objective/train/original_loss": 3.028968095779419, "objective/train/theoretical_loss": 3.3980874210409797, "objective/train/tokens_used": 602090976, "objective/train/value_avg": -0.0195465087890625, "objective/train/value_loss": 0.004104113671928644, "objective/train/value_max": -0.0006537437438964844, "objective/train/value_min": -0.65283203125, "objective/train/value_reward_corr": 0.6069987997966702, "objective/train/value_std": 0.033599853515625, "objective/train/weight_avg": 1.000517725944519, "objective/train/weighted_lm_loss": 3.0309090614318848, "objective/train/weights_max": 1.0446877479553223, "objective/train/weights_min": 0.9179182052612305, "theoretical_loss": 3.3980874210409797, "tokens_seen": 2243231744 }, { "epoch": 0.36, "learning_rate": 0.0006514710636922082, "loss": 1.3035, "theoretical_loss": 3.3980469659512624, "tokens_seen": 2243559424 }, { "epoch": 0.36, "learning_rate": 0.000651147752990624, "loss": 1.2805, "theoretical_loss": 3.3979822535362874, "tokens_seen": 2244083712 }, { "epoch": 0.36, "learning_rate": 0.0006508244422890399, "loss": 1.3066, "theoretical_loss": 3.397917560470535, "tokens_seen": 2244608000 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0033611988183110952, "objective/train/docs_used": 1272103, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.91682505607605, "objective/train/original_loss": 2.916825532913208, "objective/train/theoretical_loss": 3.397885221190397, "objective/train/tokens_used": 603729376, "objective/train/value_avg": -0.0178070068359375, "objective/train/value_loss": 0.002501885872334242, "objective/train/value_max": -0.0007352828979492188, "objective/train/value_min": -0.97265625, "objective/train/value_reward_corr": 0.6739492416153736, "objective/train/value_std": 0.03631591796875, "objective/train/weight_avg": 1.000348448753357, "objective/train/weighted_lm_loss": 2.9185309410095215, "objective/train/weights_max": 1.0327850580215454, "objective/train/weights_min": 0.9142346382141113, "theoretical_loss": 3.397885221190397, "tokens_seen": 2244870144 }, { "epoch": 0.36, "learning_rate": 0.0006505011315874556, "loss": 1.271, "theoretical_loss": 3.3978528867437015, "tokens_seen": 2245132288 }, { "epoch": 0.36, "learning_rate": 0.0006501778208858713, "loss": 1.3057, "theoretical_loss": 3.3977882323454924, "tokens_seen": 2245656576 }, { "epoch": 0.36, "learning_rate": 0.0006498545101842871, "loss": 1.31, "theoretical_loss": 3.39772359726562, "tokens_seen": 2246180864 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0037488206289708614, "objective/train/docs_used": 1272482, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.65596079826355, "objective/train/original_loss": 2.65596079826355, "objective/train/theoretical_loss": 3.3976832101462247, "objective/train/tokens_used": 605367776, "objective/train/value_avg": -0.02203369140625, "objective/train/value_loss": 0.004503023810684681, "objective/train/value_max": -0.0005173683166503906, "objective/train/value_min": -0.8876953125, "objective/train/value_reward_corr": 0.3675169435152782, "objective/train/value_std": 0.02978515625, "objective/train/weight_avg": 1.0003970861434937, "objective/train/weighted_lm_loss": 2.6580326557159424, "objective/train/weights_max": 1.0780222415924072, "objective/train/weights_min": 0.9071085453033447, "theoretical_loss": 3.3976832101462247, "tokens_seen": 2246508544 }, { "epoch": 0.36, "learning_rate": 0.0006495311994827029, "loss": 1.2921, "theoretical_loss": 3.397658981493805, "tokens_seen": 2246705152 }, { "epoch": 0.36, "learning_rate": 0.0006492078887811186, "loss": 1.2818, "theoretical_loss": 3.3975943850197763, "tokens_seen": 2247229440 }, { "epoch": 0.36, "learning_rate": 0.0006488845780795345, "loss": 1.3414, "theoretical_loss": 3.39752980783327, "tokens_seen": 2247753728 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.008920294232666492, "objective/train/docs_used": 1273740, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9368462562561035, "objective/train/original_loss": 2.9368462562561035, "objective/train/theoretical_loss": 3.3974813875946577, "objective/train/tokens_used": 607006176, "objective/train/value_avg": -0.0160064697265625, "objective/train/value_loss": 0.0014400738291442394, "objective/train/value_max": -0.0009145736694335938, "objective/train/value_min": -0.406982421875, "objective/train/value_reward_corr": 0.35277411338134784, "objective/train/value_std": 0.0157470703125, "objective/train/weight_avg": 1.000899076461792, "objective/train/weighted_lm_loss": 2.9402849674224854, "objective/train/weights_max": 1.0311428308486938, "objective/train/weights_min": 0.9392551183700562, "theoretical_loss": 3.3974813875946577, "tokens_seen": 2248146944 }, { "epoch": 0.36, "learning_rate": 0.0006485612673779502, "loss": 1.3064, "theoretical_loss": 3.3974652499240308, "tokens_seen": 2248278016 }, { "epoch": 0.36, "learning_rate": 0.000648237956676366, "loss": 1.3127, "theoretical_loss": 3.3974007112818105, "tokens_seen": 2248802304 }, { "epoch": 0.36, "learning_rate": 0.0006479146459747818, "loss": 1.3379, "theoretical_loss": 3.3973361918963683, "tokens_seen": 2249326592 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.006243350449949503, "objective/train/docs_used": 1274460, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4632973670959473, "objective/train/original_loss": 2.463297128677368, "objective/train/theoretical_loss": 3.397279753222639, "objective/train/tokens_used": 608644576, "objective/train/value_avg": -0.0179595947265625, "objective/train/value_loss": 0.0009255858021788299, "objective/train/value_max": -0.0005273818969726562, "objective/train/value_min": -0.60546875, "objective/train/value_reward_corr": 0.43574881493781276, "objective/train/value_std": 0.0159454345703125, "objective/train/weight_avg": 1.00062894821167, "objective/train/weighted_lm_loss": 2.4655425548553467, "objective/train/weights_max": 1.05464768409729, "objective/train/weights_min": 0.9631949067115784, "theoretical_loss": 3.397279753222639, "tokens_seen": 2249785344 }, { "epoch": 0.36, "learning_rate": 0.0006475913352731975, "loss": 1.2675, "theoretical_loss": 3.3972716917574735, "tokens_seen": 2249850880 }, { "epoch": 0.36, "learning_rate": 0.0006472680245716134, "loss": 1.2959, "theoretical_loss": 3.3972072108549, "tokens_seen": 2250375168 }, { "epoch": 0.36, "learning_rate": 0.000646944713870029, "loss": 1.2991, "theoretical_loss": 3.3971427491784323, "tokens_seen": 2250899456 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.003283695550635457, "objective/train/docs_used": 1275535, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6301774978637695, "objective/train/original_loss": 2.6301767826080322, "objective/train/theoretical_loss": 3.3970783067178614, "objective/train/tokens_used": 610282976, "objective/train/value_avg": -0.0184173583984375, "objective/train/value_loss": 0.004644853062927723, "objective/train/value_max": -0.0005974769592285156, "objective/train/value_min": -0.849609375, "objective/train/value_reward_corr": 0.5531100496526042, "objective/train/value_std": 0.0345458984375, "objective/train/weight_avg": 1.0003511905670166, "objective/train/weighted_lm_loss": 2.631376028060913, "objective/train/weights_max": 1.0319766998291016, "objective/train/weights_min": 0.9101042747497559, "theoretical_loss": 3.3970783067178614, "tokens_seen": 2251423744 }, { "epoch": 0.36, "learning_rate": 0.0006466214031684449, "loss": 1.3481, "theoretical_loss": 3.3970783067178614, "tokens_seen": 2251423744 }, { "epoch": 0.36, "learning_rate": 0.0006462980924668607, "loss": 1.3573, "theoretical_loss": 3.3970138834629857, "tokens_seen": 2251948032 }, { "epoch": 0.36, "learning_rate": 0.0006459747817652764, "loss": 1.3233, "theoretical_loss": 3.396949479403613, "tokens_seen": 2252472320 }, { "epoch": 0.36, "learning_rate": 0.0006456514710636923, "loss": 1.3336, "theoretical_loss": 3.396885094529557, "tokens_seen": 2252996608 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.007103523705154657, "objective/train/docs_used": 1276002, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4225523471832275, "objective/train/original_loss": 2.4225521087646484, "objective/train/theoretical_loss": 3.3968770477687604, "objective/train/tokens_used": 611921376, "objective/train/value_avg": -0.01448822021484375, "objective/train/value_loss": 0.0007017153548076749, "objective/train/value_max": -0.000469207763671875, "objective/train/value_min": -0.68994140625, "objective/train/value_reward_corr": 0.24329899599079108, "objective/train/value_std": 0.0183868408203125, "objective/train/weight_avg": 1.00071382522583, "objective/train/weighted_lm_loss": 2.4249119758605957, "objective/train/weights_max": 1.0602138042449951, "objective/train/weights_min": 0.9753049612045288, "theoretical_loss": 3.3968770477687604, "tokens_seen": 2253062144 }, { "epoch": 0.36, "learning_rate": 0.000645328160362108, "loss": 1.2988, "theoretical_loss": 3.3968207288306402, "tokens_seen": 2253520896 }, { "epoch": 0.36, "learning_rate": 0.0006450048496605237, "loss": 1.3379, "theoretical_loss": 3.396756382296693, "tokens_seen": 2254045184 }, { "epoch": 0.36, "learning_rate": 0.0006446815389589396, "loss": 1.3001, "theoretical_loss": 3.3966920549175526, "tokens_seen": 2254569472 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.005153324920684099, "objective/train/docs_used": 1277060, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.211369276046753, "objective/train/original_loss": 2.211369752883911, "objective/train/theoretical_loss": 3.396675976064516, "objective/train/tokens_used": 613559776, "objective/train/value_avg": -0.01336669921875, "objective/train/value_loss": 0.0016389585798606277, "objective/train/value_max": -0.0009145736694335938, "objective/train/value_min": -0.44677734375, "objective/train/value_reward_corr": 0.2641915258710057, "objective/train/value_std": 0.0123748779296875, "objective/train/weight_avg": 1.0005234479904175, "objective/train/weighted_lm_loss": 2.212738275527954, "objective/train/weights_max": 1.026222825050354, "objective/train/weights_min": 0.9272564649581909, "theoretical_loss": 3.396675976064516, "tokens_seen": 2254700544 }, { "epoch": 0.36, "learning_rate": 0.0006443582282573553, "loss": 1.3128, "theoretical_loss": 3.396627746683065, "tokens_seen": 2255093760 }, { "epoch": 0.36, "learning_rate": 0.0006440349175557712, "loss": 1.3278, "theoretical_loss": 3.3965634575830843, "tokens_seen": 2255618048 }, { "epoch": 0.36, "learning_rate": 0.0006437116068541868, "loss": 1.334, "theoretical_loss": 3.3964991876074704, "tokens_seen": 2256142336 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.007392835803329945, "objective/train/docs_used": 1277665, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.808966875076294, "objective/train/original_loss": 2.808966875076294, "objective/train/theoretical_loss": 3.3964750912950477, "objective/train/tokens_used": 615198176, "objective/train/value_avg": -0.0185699462890625, "objective/train/value_loss": 0.0019034873694181442, "objective/train/value_max": -0.000293731689453125, "objective/train/value_min": -0.95263671875, "objective/train/value_reward_corr": 0.6150755287439202, "objective/train/value_std": 0.03704833984375, "objective/train/weight_avg": 1.0007487535476685, "objective/train/weighted_lm_loss": 2.811915636062622, "objective/train/weights_max": 1.042252779006958, "objective/train/weights_min": 0.9329053163528442, "theoretical_loss": 3.3964750912950477, "tokens_seen": 2256338944 }, { "epoch": 0.36, "learning_rate": 0.0006433882961526026, "loss": 1.3299, "theoretical_loss": 3.3964349367460924, "tokens_seen": 2256666624 }, { "epoch": 0.36, "learning_rate": 0.0006430649854510185, "loss": 1.3423, "theoretical_loss": 3.396370704988827, "tokens_seen": 2257190912 }, { "epoch": 0.36, "learning_rate": 0.0006427416747494342, "loss": 1.3682, "theoretical_loss": 3.3963064923255586, "tokens_seen": 2257715200 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0077814036048948765, "objective/train/docs_used": 1279074, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 1.9466900825500488, "objective/train/original_loss": 1.9466902017593384, "objective/train/theoretical_loss": 3.396274393151014, "objective/train/tokens_used": 616836576, "objective/train/value_avg": -0.0202178955078125, "objective/train/value_loss": 0.0031105380039662123, "objective/train/value_max": -0.00046181678771972656, "objective/train/value_min": -0.97412109375, "objective/train/value_reward_corr": 0.5906074152658257, "objective/train/value_std": 0.05035400390625, "objective/train/weight_avg": 1.0007935762405396, "objective/train/weighted_lm_loss": 1.9486134052276611, "objective/train/weights_max": 1.056373953819275, "objective/train/weights_min": 0.9110721945762634, "theoretical_loss": 3.396274393151014, "tokens_seen": 2257977344 }, { "epoch": 0.36, "learning_rate": 0.00064241836404785, "loss": 1.3347, "theoretical_loss": 3.396242298746179, "tokens_seen": 2258239488 }, { "epoch": 0.36, "learning_rate": 0.0006420950533462657, "loss": 1.3312, "theoretical_loss": 3.3961781242405875, "tokens_seen": 2258763776 }, { "epoch": 0.36, "learning_rate": 0.0006417717426446815, "loss": 1.2922, "theoretical_loss": 3.396113968798692, "tokens_seen": 2259288064 }, { "epoch": 0.36, "objective/train/advantage_avg": -0.0021721364464610815, "objective/train/docs_used": 1279794, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6056716442108154, "objective/train/original_loss": 2.6056716442108154, "objective/train/theoretical_loss": 3.3960738813238085, "objective/train/tokens_used": 618474976, "objective/train/value_avg": -0.016845703125, "objective/train/value_loss": 0.006824462674558163, "objective/train/value_max": -0.0005860328674316406, "objective/train/value_min": -0.7041015625, "objective/train/value_reward_corr": 0.46636236632552813, "objective/train/value_std": 0.027496337890625, "objective/train/weight_avg": 0.9998162388801575, "objective/train/weighted_lm_loss": 2.6047070026397705, "objective/train/weights_max": 1.0348217487335205, "objective/train/weights_min": 0.9099257588386536, "theoretical_loss": 3.3960738813238085, "tokens_seen": 2259615744 }, { "epoch": 0.37, "learning_rate": 0.0006414484319430974, "loss": 1.2979, "theoretical_loss": 3.396049832410407, "tokens_seen": 2259812352 }, { "epoch": 0.37, "learning_rate": 0.0006411251212415131, "loss": 1.2895, "theoretical_loss": 3.395985715065655, "tokens_seen": 2260336640 }, { "epoch": 0.37, "learning_rate": 0.0006408018105399289, "loss": 1.3358, "theoretical_loss": 3.3959216167543675, "tokens_seen": 2260860928 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.004949556197971106, "objective/train/docs_used": 1281070, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5077943801879883, "objective/train/original_loss": 2.5077946186065674, "objective/train/theoretical_loss": 3.395873555505559, "objective/train/tokens_used": 620113376, "objective/train/value_avg": -0.026031494140625, "objective/train/value_loss": 0.007205890957266092, "objective/train/value_max": -0.00043892860412597656, "objective/train/value_min": -0.96875, "objective/train/value_reward_corr": 0.5397803633358934, "objective/train/value_std": 0.050323486328125, "objective/train/weight_avg": 1.0005302429199219, "objective/train/weighted_lm_loss": 2.5083794593811035, "objective/train/weights_max": 1.062050461769104, "objective/train/weights_min": 0.9081803560256958, "theoretical_loss": 3.395873555505559, "tokens_seen": 2261254144 }, { "epoch": 0.37, "learning_rate": 0.0006404784998383446, "loss": 1.3012, "theoretical_loss": 3.3958575374664814, "tokens_seen": 2261385216 }, { "epoch": 0.37, "learning_rate": 0.0006401551891367604, "loss": 1.3103, "theoretical_loss": 3.3957934771919427, "tokens_seen": 2261909504 }, { "epoch": 0.37, "learning_rate": 0.0006398318784351762, "loss": 1.2952, "theoretical_loss": 3.3957294359207046, "tokens_seen": 2262433792 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.013710260391235352, "objective/train/docs_used": 1281765, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0826213359832764, "objective/train/original_loss": 3.082620859146118, "objective/train/theoretical_loss": 3.3956734153891244, "objective/train/tokens_used": 621751776, "objective/train/value_avg": -0.0269317626953125, "objective/train/value_loss": 0.003842971520498395, "objective/train/value_max": -0.0009622573852539062, "objective/train/value_min": -0.95751953125, "objective/train/value_reward_corr": 0.5337806907482162, "objective/train/value_std": 0.0418701171875, "objective/train/weight_avg": 1.0013898611068726, "objective/train/weighted_lm_loss": 3.08622407913208, "objective/train/weights_max": 1.0311484336853027, "objective/train/weights_min": 0.9097120761871338, "theoretical_loss": 3.3956734153891244, "tokens_seen": 2262892544 }, { "epoch": 0.37, "learning_rate": 0.000639508567733592, "loss": 1.293, "theoretical_loss": 3.3956654136427282, "tokens_seen": 2262958080 }, { "epoch": 0.37, "learning_rate": 0.0006391852570320078, "loss": 1.2771, "theoretical_loss": 3.395601410347982, "tokens_seen": 2263482368 }, { "epoch": 0.37, "learning_rate": 0.0006388619463304236, "loss": 1.3136, "theoretical_loss": 3.3955374260264417, "tokens_seen": 2264006656 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0080293333157897, "objective/train/docs_used": 1283066, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.3369812965393066, "objective/train/original_loss": 3.3369810581207275, "objective/train/theoretical_loss": 3.395473460668092, "objective/train/tokens_used": 623390176, "objective/train/value_avg": -0.0183258056640625, "objective/train/value_loss": 0.0027535397093743086, "objective/train/value_max": -0.00036835670471191406, "objective/train/value_min": -0.9697265625, "objective/train/value_reward_corr": 0.560550697156256, "objective/train/value_std": 0.033843994140625, "objective/train/weight_avg": 1.0008165836334229, "objective/train/weighted_lm_loss": 3.340176820755005, "objective/train/weights_max": 1.0707368850708008, "objective/train/weights_min": 0.9280697107315063, "theoretical_loss": 3.395473460668092, "tokens_seen": 2264530944 }, { "epoch": 0.37, "learning_rate": 0.0006385386356288393, "loss": 1.3181, "theoretical_loss": 3.395473460668092, "tokens_seen": 2264530944 }, { "epoch": 0.37, "learning_rate": 0.000638215324927255, "loss": 1.2885, "theoretical_loss": 3.3954095142629237, "tokens_seen": 2265055232 }, { "epoch": 0.37, "learning_rate": 0.0006378920142256709, "loss": 1.318, "theoretical_loss": 3.3953455868009357, "tokens_seen": 2265579520 }, { "epoch": 0.37, "learning_rate": 0.0006375687035240867, "loss": 1.3161, "theoretical_loss": 3.395281678272135, "tokens_seen": 2266103808 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0013348660431802273, "objective/train/docs_used": 1283537, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7272071838378906, "objective/train/original_loss": 2.7272071838378906, "objective/train/theoretical_loss": 3.395273691036777, "objective/train/tokens_used": 625028576, "objective/train/value_avg": -0.0239105224609375, "objective/train/value_loss": 0.003420903580263257, "objective/train/value_max": -0.0003514289855957031, "objective/train/value_min": -0.9658203125, "objective/train/value_reward_corr": 0.6715400869345712, "objective/train/value_std": 0.050537109375, "objective/train/weight_avg": 1.0001503229141235, "objective/train/weighted_lm_loss": 2.728060722351074, "objective/train/weights_max": 1.05832839012146, "objective/train/weights_min": 0.9082136750221252, "theoretical_loss": 3.395273691036777, "tokens_seen": 2266169344 }, { "epoch": 0.37, "learning_rate": 0.0006372453928225025, "loss": 1.3039, "theoretical_loss": 3.3952177886665353, "tokens_seen": 2266628096 }, { "epoch": 0.37, "learning_rate": 0.0006369220821209182, "loss": 1.353, "theoretical_loss": 3.3951539179741586, "tokens_seen": 2267152384 }, { "epoch": 0.37, "learning_rate": 0.000636598771419334, "loss": 1.2865, "theoretical_loss": 3.3950900661850345, "tokens_seen": 2267676672 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0061454083770513535, "objective/train/docs_used": 1284224, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5290439128875732, "objective/train/original_loss": 2.5290439128875732, "objective/train/theoretical_loss": 3.395074106190219, "objective/train/tokens_used": 626666976, "objective/train/value_avg": -0.016326904296875, "objective/train/value_loss": 0.0007455444429069757, "objective/train/value_max": -0.00036978721618652344, "objective/train/value_min": -0.80224609375, "objective/train/value_reward_corr": 0.7286249977470557, "objective/train/value_std": 0.02252197265625, "objective/train/weight_avg": 1.0006182193756104, "objective/train/weighted_lm_loss": 2.5315518379211426, "objective/train/weights_max": 1.0342785120010376, "objective/train/weights_min": 0.9342418909072876, "theoretical_loss": 3.395074106190219, "tokens_seen": 2267807744 }, { "epoch": 0.37, "learning_rate": 0.0006362754607177498, "loss": 1.274, "theoretical_loss": 3.3950262332891996, "tokens_seen": 2268200960 }, { "epoch": 0.37, "learning_rate": 0.0006359521500161656, "loss": 1.2826, "theoretical_loss": 3.394962419276698, "tokens_seen": 2268725248 }, { "epoch": 0.37, "learning_rate": 0.0006356288393145813, "loss": 1.321, "theoretical_loss": 3.394898624137582, "tokens_seen": 2269249536 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.007945096120238304, "objective/train/docs_used": 1285503, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7533724308013916, "objective/train/original_loss": 2.7533724308013916, "objective/train/theoretical_loss": 3.3948747058241793, "objective/train/tokens_used": 628305376, "objective/train/value_avg": -0.02191162109375, "objective/train/value_loss": 0.004334697965532541, "objective/train/value_max": -0.00034332275390625, "objective/train/value_min": -0.966796875, "objective/train/value_reward_corr": 0.393153467993368, "objective/train/value_std": 0.042724609375, "objective/train/weight_avg": 1.0008158683776855, "objective/train/weighted_lm_loss": 2.7559025287628174, "objective/train/weights_max": 1.0855756998062134, "objective/train/weights_min": 0.9078043699264526, "theoretical_loss": 3.3948747058241793, "tokens_seen": 2269446144 }, { "epoch": 0.37, "learning_rate": 0.0006353055286129971, "loss": 1.3707, "theoretical_loss": 3.3948348478619113, "tokens_seen": 2269773824 }, { "epoch": 0.37, "learning_rate": 0.0006349822179114128, "loss": 1.3163, "theoretical_loss": 3.394771090439752, "tokens_seen": 2270298112 }, { "epoch": 0.37, "learning_rate": 0.0006346589072098287, "loss": 1.3062, "theoretical_loss": 3.39470735186118, "tokens_seen": 2270822400 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0050528752617537975, "objective/train/docs_used": 1286076, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.07955265045166, "objective/train/original_loss": 3.0795528888702393, "objective/train/theoretical_loss": 3.3946754896351394, "objective/train/tokens_used": 629943776, "objective/train/value_avg": -0.0218658447265625, "objective/train/value_loss": 0.004461497068405151, "objective/train/value_max": -0.00036263465881347656, "objective/train/value_min": -0.98779296875, "objective/train/value_reward_corr": 0.48832768772065294, "objective/train/value_std": 0.037200927734375, "objective/train/weight_avg": 1.0005271434783936, "objective/train/weighted_lm_loss": 3.0813522338867188, "objective/train/weights_max": 1.039657473564148, "objective/train/weights_min": 0.9088773131370544, "theoretical_loss": 3.3946754896351394, "tokens_seen": 2271084544 }, { "epoch": 0.37, "learning_rate": 0.0006343355965082445, "loss": 1.3027, "theoretical_loss": 3.394643632116277, "tokens_seen": 2271346688 }, { "epoch": 0.37, "learning_rate": 0.0006340122858066602, "loss": 1.2653, "theoretical_loss": 3.394579931195132, "tokens_seen": 2271870976 }, { "epoch": 0.37, "learning_rate": 0.000633688975105076, "loss": 1.3471, "theoretical_loss": 3.3945162490878418, "tokens_seen": 2272395264 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.00317620811983943, "objective/train/docs_used": 1287335, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.836552858352661, "objective/train/original_loss": 2.8365533351898193, "objective/train/theoretical_loss": 3.3944764573202995, "objective/train/tokens_used": 631582176, "objective/train/value_avg": -0.020843505859375, "objective/train/value_loss": 0.004174229688942432, "objective/train/value_max": -0.0002512931823730469, "objective/train/value_min": -0.88916015625, "objective/train/value_reward_corr": 0.4407372151123287, "objective/train/value_std": 0.0292510986328125, "objective/train/weight_avg": 1.0003381967544556, "objective/train/weighted_lm_loss": 2.8374249935150146, "objective/train/weights_max": 1.0505467653274536, "objective/train/weights_min": 0.9187308549880981, "theoretical_loss": 3.3944764573202995, "tokens_seen": 2272722944 }, { "epoch": 0.37, "learning_rate": 0.0006333656644034917, "loss": 1.3439, "theoretical_loss": 3.3944525857845123, "tokens_seen": 2272919552 }, { "epoch": 0.37, "learning_rate": 0.0006330423537019075, "loss": 1.2781, "theoretical_loss": 3.3943889412752544, "tokens_seen": 2273443840 }, { "epoch": 0.37, "learning_rate": 0.0006327190430003234, "loss": 1.3085, "theoretical_loss": 3.394325315550188, "tokens_seen": 2273968128 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.008295581676065922, "objective/train/docs_used": 1288139, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.92570161819458, "objective/train/original_loss": 2.9257020950317383, "objective/train/theoretical_loss": 3.3942776085775743, "objective/train/tokens_used": 633220576, "objective/train/value_avg": -0.01409912109375, "objective/train/value_loss": 0.0012530575040727854, "objective/train/value_max": -0.0004372596740722656, "objective/train/value_min": -0.6240234375, "objective/train/value_reward_corr": 0.4734326912858569, "objective/train/value_std": 0.01708984375, "objective/train/weight_avg": 1.0008357763290405, "objective/train/weighted_lm_loss": 2.92848801612854, "objective/train/weights_max": 1.0186539888381958, "objective/train/weights_min": 0.9449458122253418, "theoretical_loss": 3.3942776085775743, "tokens_seen": 2274361344 }, { "epoch": 0.37, "learning_rate": 0.0006323957322987391, "loss": 1.3091, "theoretical_loss": 3.39426170859944, "tokens_seen": 2274492416 }, { "epoch": 0.37, "learning_rate": 0.0006320724215971549, "loss": 1.3261, "theoretical_loss": 3.3941981204131455, "tokens_seen": 2275016704 }, { "epoch": 0.37, "learning_rate": 0.0006317491108955706, "loss": 1.2766, "theoretical_loss": 3.3941345509814456, "tokens_seen": 2275540992 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.003752990160137415, "objective/train/docs_used": 1289239, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9208498001098633, "objective/train/original_loss": 2.920849561691284, "objective/train/theoretical_loss": 3.394078943105593, "objective/train/tokens_used": 634858976, "objective/train/value_avg": -0.041107177734375, "objective/train/value_loss": 0.00868811085820198, "objective/train/value_max": -0.0008797645568847656, "objective/train/value_min": -0.794921875, "objective/train/value_reward_corr": 0.5668892737015173, "objective/train/value_std": 0.083251953125, "objective/train/weight_avg": 1.0004184246063232, "objective/train/weighted_lm_loss": 2.92264723777771, "objective/train/weights_max": 1.0534183979034424, "objective/train/weights_min": 0.9198768734931946, "theoretical_loss": 3.394078943105593, "tokens_seen": 2275999744 }, { "epoch": 0.38, "learning_rate": 0.0006314258001939864, "loss": 1.3559, "theoretical_loss": 3.3940710002944905, "tokens_seen": 2276065280 }, { "epoch": 0.38, "learning_rate": 0.0006311024894924023, "loss": 1.3004, "theoretical_loss": 3.394007468342436, "tokens_seen": 2276589568 }, { "epoch": 0.38, "learning_rate": 0.000630779178790818, "loss": 1.3063, "theoretical_loss": 3.3939439551154473, "tokens_seen": 2277113856 }, { "epoch": 0.38, "objective/train/advantage_avg": -0.010895474813878536, "objective/train/docs_used": 1289790, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.837294340133667, "objective/train/original_loss": 2.837294340133667, "objective/train/theoretical_loss": 3.393880460603695, "objective/train/tokens_used": 636497376, "objective/train/value_avg": -0.025634765625, "objective/train/value_loss": 0.009381589479744434, "objective/train/value_max": -0.0004546642303466797, "objective/train/value_min": -0.98876953125, "objective/train/value_reward_corr": 0.6745022598028155, "objective/train/value_std": 0.047576904296875, "objective/train/weight_avg": 0.9989565014839172, "objective/train/weighted_lm_loss": 2.833341360092163, "objective/train/weights_max": 1.0487943887710571, "objective/train/weights_min": 0.9076473116874695, "theoretical_loss": 3.393880460603695, "tokens_seen": 2277638144 }, { "epoch": 0.38, "learning_rate": 0.0006304558680892337, "loss": 1.3124, "theoretical_loss": 3.393880460603695, "tokens_seen": 2277638144 }, { "epoch": 0.38, "learning_rate": 0.0006301325573876495, "loss": 1.3302, "theoretical_loss": 3.3938169847973594, "tokens_seen": 2278162432 }, { "epoch": 0.38, "learning_rate": 0.0006298092466860653, "loss": 1.297, "theoretical_loss": 3.3937535276866266, "tokens_seen": 2278686720 }, { "epoch": 0.38, "learning_rate": 0.0006294859359844812, "loss": 1.2896, "theoretical_loss": 3.39369008926169, "tokens_seen": 2279211008 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.004683047998696566, "objective/train/docs_used": 1290983, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5431320667266846, "objective/train/original_loss": 2.5431318283081055, "objective/train/theoretical_loss": 3.39368216077193, "objective/train/tokens_used": 638135776, "objective/train/value_avg": -0.0178070068359375, "objective/train/value_loss": 0.0024618355091661215, "objective/train/value_max": -0.0003006458282470703, "objective/train/value_min": -0.5322265625, "objective/train/value_reward_corr": 0.4622492853783573, "objective/train/value_std": 0.02789306640625, "objective/train/weight_avg": 1.0004804134368896, "objective/train/weighted_lm_loss": 2.544468879699707, "objective/train/weights_max": 1.054129958152771, "objective/train/weights_min": 0.9216017127037048, "theoretical_loss": 3.39368216077193, "tokens_seen": 2279276544 }, { "epoch": 0.38, "learning_rate": 0.0006291626252828969, "loss": 1.2881, "theoretical_loss": 3.3936266695127513, "tokens_seen": 2279735296 }, { "epoch": 0.38, "learning_rate": 0.0006288393145813126, "loss": 1.3324, "theoretical_loss": 3.3935632684300194, "tokens_seen": 2280259584 }, { "epoch": 0.38, "learning_rate": 0.0006285160038797284, "loss": 1.3575, "theoretical_loss": 3.39349988600371, "tokens_seen": 2280783872 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0048992992378771305, "objective/train/docs_used": 1291619, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7493646144866943, "objective/train/original_loss": 2.7493650913238525, "objective/train/theoretical_loss": 3.3934840433110534, "objective/train/tokens_used": 639774176, "objective/train/value_avg": -0.0199432373046875, "objective/train/value_loss": 0.0038853902369737625, "objective/train/value_max": -0.0004897117614746094, "objective/train/value_min": -0.99169921875, "objective/train/value_reward_corr": 0.5123918386637569, "objective/train/value_std": 0.042724609375, "objective/train/weight_avg": 1.0005091428756714, "objective/train/weighted_lm_loss": 2.751206159591675, "objective/train/weights_max": 1.0669753551483154, "objective/train/weights_min": 0.9067533612251282, "theoretical_loss": 3.3934840433110534, "tokens_seen": 2280914944 }, { "epoch": 0.38, "learning_rate": 0.0006281926931781442, "loss": 1.2866, "theoretical_loss": 3.3934365222240475, "tokens_seen": 2281308160 }, { "epoch": 0.38, "learning_rate": 0.0006278693824765601, "loss": 1.3176, "theoretical_loss": 3.393373177081261, "tokens_seen": 2281832448 }, { "epoch": 0.38, "learning_rate": 0.0006275460717749758, "loss": 1.3299, "theoretical_loss": 3.39330985056559, "tokens_seen": 2282356736 }, { "epoch": 0.38, "objective/train/advantage_avg": -0.0006051029777154326, "objective/train/docs_used": 1292123, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.031052589416504, "objective/train/original_loss": 2.031052350997925, "objective/train/theoretical_loss": 3.3932861079225254, "objective/train/tokens_used": 641412576, "objective/train/value_avg": -0.0216217041015625, "objective/train/value_loss": 0.004949938040226698, "objective/train/value_max": -0.00051116943359375, "objective/train/value_min": -0.97314453125, "objective/train/value_reward_corr": 0.4782994079746493, "objective/train/value_std": 0.039154052734375, "objective/train/weight_avg": 0.9999638199806213, "objective/train/weighted_lm_loss": 2.031369209289551, "objective/train/weights_max": 1.0853406190872192, "objective/train/weights_min": 0.9078819155693054, "theoretical_loss": 3.3932861079225254, "tokens_seen": 2282553344 }, { "epoch": 0.38, "learning_rate": 0.0006272227610733915, "loss": 1.3132, "theoretical_loss": 3.39324654266728, "tokens_seen": 2282881024 }, { "epoch": 0.38, "learning_rate": 0.0006268994503718073, "loss": 1.3262, "theoretical_loss": 3.3931832533765833, "tokens_seen": 2283405312 }, { "epoch": 0.38, "learning_rate": 0.0006265761396702231, "loss": 1.3137, "theoretical_loss": 3.3931199826837606, "tokens_seen": 2283929600 }, { "epoch": 0.38, "objective/train/advantage_avg": -0.001550885383039713, "objective/train/docs_used": 1293247, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7987465858459473, "objective/train/original_loss": 2.7987473011016846, "objective/train/theoretical_loss": 3.39308835430851, "objective/train/tokens_used": 643050976, "objective/train/value_avg": -0.03472900390625, "objective/train/value_loss": 0.00985274650156498, "objective/train/value_max": -0.0005440711975097656, "objective/train/value_min": -0.98974609375, "objective/train/value_reward_corr": 0.6083740052593074, "objective/train/value_std": 0.057464599609375, "objective/train/weight_avg": 0.9998934268951416, "objective/train/weighted_lm_loss": 2.799090623855591, "objective/train/weights_max": 1.0463451147079468, "objective/train/weights_min": 0.9069433212280273, "theoretical_loss": 3.39308835430851, "tokens_seen": 2284191744 }, { "epoch": 0.38, "learning_rate": 0.0006262528289686388, "loss": 1.2976, "theoretical_loss": 3.393056730579079, "tokens_seen": 2284453888 }, { "epoch": 0.38, "learning_rate": 0.0006259295182670547, "loss": 1.292, "theoretical_loss": 3.3929934970528137, "tokens_seen": 2284978176 }, { "epoch": 0.38, "learning_rate": 0.0006256062075654704, "loss": 1.3284, "theoretical_loss": 3.3929302820952474, "tokens_seen": 2285502464 }, { "epoch": 0.38, "objective/train/advantage_avg": -0.004382600542157888, "objective/train/docs_used": 1293760, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.565077543258667, "objective/train/original_loss": 2.5650768280029297, "objective/train/theoretical_loss": 3.3928907821718703, "objective/train/tokens_used": 644689376, "objective/train/value_avg": -0.032440185546875, "objective/train/value_loss": 0.00511235510930419, "objective/train/value_max": -0.0003528594970703125, "objective/train/value_min": -0.59619140625, "objective/train/value_reward_corr": 0.8692493463340826, "objective/train/value_std": 0.067138671875, "objective/train/weight_avg": 0.9995870590209961, "objective/train/weighted_lm_loss": 2.563526153564453, "objective/train/weights_max": 1.035616397857666, "objective/train/weights_min": 0.9217349886894226, "theoretical_loss": 3.3928907821718703, "tokens_seen": 2285830144 }, { "epoch": 0.38, "learning_rate": 0.0006252828968638862, "loss": 1.3392, "theoretical_loss": 3.3928670856966687, "tokens_seen": 2286026752 }, { "epoch": 0.38, "learning_rate": 0.000624959586162302, "loss": 1.3538, "theoretical_loss": 3.392803907847375, "tokens_seen": 2286551040 }, { "epoch": 0.38, "learning_rate": 0.0006246362754607177, "loss": 1.3005, "theoretical_loss": 3.39274074853767, "tokens_seen": 2287075328 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.005825016647577286, "objective/train/docs_used": 1294930, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9270689487457275, "objective/train/original_loss": 2.9270682334899902, "objective/train/theoretical_loss": 3.3926933912161683, "objective/train/tokens_used": 646327776, "objective/train/value_avg": -0.022308349609375, "objective/train/value_loss": 0.0034466912038624287, "objective/train/value_max": -0.00033020973205566406, "objective/train/value_min": -0.8251953125, "objective/train/value_reward_corr": 0.5195216667067454, "objective/train/value_std": 0.03887939453125, "objective/train/weight_avg": 1.0005995035171509, "objective/train/weighted_lm_loss": 2.9295170307159424, "objective/train/weights_max": 1.0564210414886475, "objective/train/weights_min": 0.933692455291748, "theoretical_loss": 3.3926933912161683, "tokens_seen": 2287468544 }, { "epoch": 0.38, "learning_rate": 0.0006243129647591336, "loss": 1.3571, "theoretical_loss": 3.3926776077578658, "tokens_seen": 2287599616 }, { "epoch": 0.38, "learning_rate": 0.0006239896540575494, "loss": 1.3216, "theoretical_loss": 3.3926144854982803, "tokens_seen": 2288123904 }, { "epoch": 0.38, "learning_rate": 0.000623666343355965, "loss": 1.3006, "theoretical_loss": 3.3925513817492403, "tokens_seen": 2288648192 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.00512238685041666, "objective/train/docs_used": 1295606, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7535393238067627, "objective/train/original_loss": 2.7535393238067627, "objective/train/theoretical_loss": 3.3924961811456615, "objective/train/tokens_used": 647966176, "objective/train/value_avg": -0.01898193359375, "objective/train/value_loss": 0.0032209553755819798, "objective/train/value_max": -0.0008425712585449219, "objective/train/value_min": -0.9609375, "objective/train/value_reward_corr": 0.5364150270697338, "objective/train/value_std": 0.031890869140625, "objective/train/weight_avg": 1.00052809715271, "objective/train/weighted_lm_loss": 2.7551169395446777, "objective/train/weights_max": 1.0501353740692139, "objective/train/weights_min": 0.9089638590812683, "theoretical_loss": 3.3924961811456615, "tokens_seen": 2289106944 }, { "epoch": 0.38, "learning_rate": 0.0006233430326543809, "loss": 1.3179, "theoretical_loss": 3.392488296501078, "tokens_seen": 2289172480 }, { "epoch": 0.38, "learning_rate": 0.0006230197219527966, "loss": 1.3198, "theoretical_loss": 3.3924252297441346, "tokens_seen": 2289696768 }, { "epoch": 0.38, "learning_rate": 0.0006226964112512125, "loss": 1.3492, "theoretical_loss": 3.3923621814687577, "tokens_seen": 2290221056 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.006192690692842007, "objective/train/docs_used": 1296233, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8638389110565186, "objective/train/original_loss": 2.8638386726379395, "objective/train/theoretical_loss": 3.3922991516653025, "objective/train/tokens_used": 649604576, "objective/train/value_avg": -0.01422119140625, "objective/train/value_loss": 0.0016243819845840335, "objective/train/value_max": -0.0006718635559082031, "objective/train/value_min": -0.66259765625, "objective/train/value_reward_corr": 0.4207194012456092, "objective/train/value_std": 0.017486572265625, "objective/train/weight_avg": 1.0006272792816162, "objective/train/weighted_lm_loss": 2.8657350540161133, "objective/train/weights_max": 1.0284250974655151, "objective/train/weights_min": 0.9110687971115112, "theoretical_loss": 3.3922991516653025, "tokens_seen": 2290745344 }, { "epoch": 0.38, "learning_rate": 0.0006223731005496283, "loss": 1.3342, "theoretical_loss": 3.3922991516653025, "tokens_seen": 2290745344 }, { "epoch": 0.38, "learning_rate": 0.0006220497898480439, "loss": 1.3538, "theoretical_loss": 3.3922361403241306, "tokens_seen": 2291269632 }, { "epoch": 0.38, "learning_rate": 0.0006217264791464598, "loss": 1.3328, "theoretical_loss": 3.3921731474356123, "tokens_seen": 2291793920 }, { "epoch": 0.38, "learning_rate": 0.0006214031684448755, "loss": 1.3441, "theoretical_loss": 3.3921101729901237, "tokens_seen": 2292318208 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.018535766750574112, "objective/train/docs_used": 1296233, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8857479095458984, "objective/train/original_loss": 2.8857481479644775, "objective/train/theoretical_loss": 3.392102302480734, "objective/train/tokens_used": 651242976, "objective/train/value_avg": -0.031768798828125, "objective/train/value_loss": 0.003875288413837552, "objective/train/value_max": -0.0004305839538574219, "objective/train/value_min": -0.67578125, "objective/train/value_reward_corr": 0.28495314304457803, "objective/train/value_std": 0.046142578125, "objective/train/weight_avg": 1.0018727779388428, "objective/train/weighted_lm_loss": 2.8902947902679443, "objective/train/weights_max": 1.041114091873169, "objective/train/weights_min": 0.9147524237632751, "theoretical_loss": 3.392102302480734, "tokens_seen": 2292383744 }, { "epoch": 0.39, "learning_rate": 0.0006210798577432913, "loss": 1.3762, "theoretical_loss": 3.3920472169780487, "tokens_seen": 2292842496 }, { "epoch": 0.39, "learning_rate": 0.0006207565470417072, "loss": 1.3817, "theoretical_loss": 3.391984279389779, "tokens_seen": 2293366784 }, { "epoch": 0.39, "learning_rate": 0.0006204332363401228, "loss": 1.3746, "theoretical_loss": 3.3919213602157123, "tokens_seen": 2293891072 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.004148440435528755, "objective/train/docs_used": 1296233, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.509946823120117, "objective/train/original_loss": 2.509946823120117, "objective/train/theoretical_loss": 3.3919056332982906, "objective/train/tokens_used": 652881376, "objective/train/value_avg": -0.0211639404296875, "objective/train/value_loss": 0.002791816368699074, "objective/train/value_max": -0.0006437301635742188, "objective/train/value_min": -0.61376953125, "objective/train/value_reward_corr": 0.42433529060598507, "objective/train/value_std": 0.0298309326171875, "objective/train/weight_avg": 1.0004286766052246, "objective/train/weighted_lm_loss": 2.511887788772583, "objective/train/weights_max": 1.0394788980484009, "objective/train/weights_min": 0.914010226726532, "theoretical_loss": 3.3919056332982906, "tokens_seen": 2294022144 }, { "epoch": 0.39, "learning_rate": 0.0006201099256385387, "loss": 1.4053, "theoretical_loss": 3.3918584594462544, "tokens_seen": 2294415360 }, { "epoch": 0.39, "learning_rate": 0.0006197866149369544, "loss": 1.4239, "theoretical_loss": 3.391795577071818, "tokens_seen": 2294939648 }, { "epoch": 0.39, "learning_rate": 0.0006194633042353702, "loss": 1.4239, "theoretical_loss": 3.3917327130828228, "tokens_seen": 2295463936 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.010501906275749207, "objective/train/docs_used": 1296233, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.897561550140381, "objective/train/original_loss": 2.8975613117218018, "objective/train/theoretical_loss": 3.391709143824992, "objective/train/tokens_used": 654519776, "objective/train/value_avg": -0.019317626953125, "objective/train/value_loss": 0.0026487670838832855, "objective/train/value_max": -0.0008039474487304688, "objective/train/value_min": -0.509765625, "objective/train/value_reward_corr": 0.3848080948314054, "objective/train/value_std": 0.021148681640625, "objective/train/weight_avg": 1.0010632276535034, "objective/train/weighted_lm_loss": 2.9011385440826416, "objective/train/weights_max": 1.0319889783859253, "objective/train/weights_min": 0.9151230454444885, "theoretical_loss": 3.391709143824992, "tokens_seen": 2295660544 }, { "epoch": 0.39, "learning_rate": 0.000619139993533786, "loss": 1.4428, "theoretical_loss": 3.3916698674696963, "tokens_seen": 2295988224 }, { "epoch": 0.39, "learning_rate": 0.0006188166828322017, "loss": 1.4453, "theoretical_loss": 3.391607040222872, "tokens_seen": 2296512512 }, { "epoch": 0.39, "learning_rate": 0.0006184933721306176, "loss": 1.441, "theoretical_loss": 3.391544231332792, "tokens_seen": 2297036800 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.008813338354229927, "objective/train/docs_used": 1296233, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1597042083740234, "objective/train/original_loss": 3.1597039699554443, "objective/train/theoretical_loss": 3.3915128337685463, "objective/train/tokens_used": 656158176, "objective/train/value_avg": -0.0200347900390625, "objective/train/value_loss": 0.0019582933746278286, "objective/train/value_max": -0.0002892017364501953, "objective/train/value_min": -0.85693359375, "objective/train/value_reward_corr": 0.46322781429261994, "objective/train/value_std": 0.0243682861328125, "objective/train/weight_avg": 1.000891089439392, "objective/train/weighted_lm_loss": 3.1633737087249756, "objective/train/weights_max": 1.0375969409942627, "objective/train/weights_min": 0.9165194034576416, "theoretical_loss": 3.3915128337685463, "tokens_seen": 2297298944 }, { "epoch": 0.39, "learning_rate": 0.0006181700614290333, "loss": 1.424, "theoretical_loss": 3.3914814407899048, "tokens_seen": 2297561088 }, { "epoch": 0.39, "learning_rate": 0.0006178467507274491, "loss": 1.4482, "theoretical_loss": 3.3914186685846657, "tokens_seen": 2298085376 }, { "epoch": 0.39, "learning_rate": 0.000617523440025865, "loss": 1.4255, "theoretical_loss": 3.391355914707538, "tokens_seen": 2298609664 }, { "epoch": 0.39, "objective/train/advantage_avg": -0.009140116162598133, "objective/train/docs_used": 1296233, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.941311836242676, "objective/train/original_loss": 2.941311836242676, "objective/train/theoretical_loss": 3.3913167028373423, "objective/train/tokens_used": 657796576, "objective/train/value_avg": -0.0287933349609375, "objective/train/value_loss": 0.013372305780649185, "objective/train/value_max": -0.0008459091186523438, "objective/train/value_min": -0.92333984375, "objective/train/value_reward_corr": 0.668588437947435, "objective/train/value_std": 0.05194091796875, "objective/train/weight_avg": 0.9991516470909119, "objective/train/weighted_lm_loss": 2.939490795135498, "objective/train/weights_max": 1.0440901517868042, "objective/train/weights_min": 0.9132347106933594, "theoretical_loss": 3.3913167028373423, "tokens_seen": 2298937344 }, { "epoch": 0.39, "learning_rate": 0.0006172001293242806, "loss": 1.4299, "theoretical_loss": 3.3912931791489913, "tokens_seen": 2299133952 }, { "epoch": 0.39, "learning_rate": 0.0006168768186226964, "loss": 1.4734, "theoretical_loss": 3.391230461899503, "tokens_seen": 2299658240 }, { "epoch": 0.39, "learning_rate": 0.0006165535079211122, "loss": 1.4783, "theoretical_loss": 3.391167762949557, "tokens_seen": 2300182528 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.005826769862323999, "objective/train/docs_used": 1296929, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.964369297027588, "objective/train/original_loss": 2.964369297027588, "objective/train/theoretical_loss": 3.391120750740452, "objective/train/tokens_used": 659434976, "objective/train/value_avg": -0.01508331298828125, "objective/train/value_loss": 0.002077616285532713, "objective/train/value_max": -0.0004029273986816406, "objective/train/value_min": -0.44775390625, "objective/train/value_reward_corr": 0.25602769908524664, "objective/train/value_std": 0.01519012451171875, "objective/train/weight_avg": 1.000592827796936, "objective/train/weighted_lm_loss": 2.9661333560943604, "objective/train/weights_max": 1.0285568237304688, "objective/train/weights_min": 0.9297000765800476, "theoretical_loss": 3.391120750740452, "tokens_seen": 2300575744 }, { "epoch": 0.39, "learning_rate": 0.000616230197219528, "loss": 1.4755, "theoretical_loss": 3.391105082289645, "tokens_seen": 2300706816 }, { "epoch": 0.39, "learning_rate": 0.0006159068865179438, "loss": 1.4506, "theoretical_loss": 3.391042419910265, "tokens_seen": 2301231104 }, { "epoch": 0.39, "learning_rate": 0.0006155835758163595, "loss": 1.4766, "theoretical_loss": 3.390979775801923, "tokens_seen": 2301755392 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.00853275042027235, "objective/train/docs_used": 1297386, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.171222448348999, "objective/train/original_loss": 2.17122220993042, "objective/train/theoretical_loss": 3.390924977187626, "objective/train/tokens_used": 661073376, "objective/train/value_avg": -0.0313720703125, "objective/train/value_loss": 0.008092422038316727, "objective/train/value_max": -0.0003669261932373047, "objective/train/value_min": -0.99853515625, "objective/train/value_reward_corr": 0.6724567842334379, "objective/train/value_std": 0.095703125, "objective/train/weight_avg": 1.000893473625183, "objective/train/weighted_lm_loss": 2.1729531288146973, "objective/train/weights_max": 1.0958987474441528, "objective/train/weights_min": 0.908396303653717, "theoretical_loss": 3.390924977187626, "tokens_seen": 2302214144 }, { "epoch": 0.39, "learning_rate": 0.0006152602651147753, "loss": 1.4543, "theoretical_loss": 3.390917149955131, "tokens_seen": 2302279680 }, { "epoch": 0.39, "learning_rate": 0.0006149369544131911, "loss": 1.452, "theoretical_loss": 3.39085454236041, "tokens_seen": 2302803968 }, { "epoch": 0.39, "learning_rate": 0.0006146136437116069, "loss": 1.4576, "theoretical_loss": 3.3907919530082853, "tokens_seen": 2303328256 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.006353702396154404, "objective/train/docs_used": 1298736, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.869968891143799, "objective/train/original_loss": 2.869969129562378, "objective/train/theoretical_loss": 3.3907293818892916, "objective/train/tokens_used": 662711776, "objective/train/value_avg": -0.02227783203125, "objective/train/value_loss": 0.0027563530020415783, "objective/train/value_max": -0.0005440711975097656, "objective/train/value_min": -0.9033203125, "objective/train/value_reward_corr": 0.6189611618702772, "objective/train/value_std": 0.043426513671875, "objective/train/weight_avg": 1.000649094581604, "objective/train/weighted_lm_loss": 2.871983051300049, "objective/train/weights_max": 1.0457144975662231, "objective/train/weights_min": 0.9286783337593079, "theoretical_loss": 3.3907293818892916, "tokens_seen": 2303852544 }, { "epoch": 0.39, "learning_rate": 0.0006142903330100226, "loss": 1.4546, "theoretical_loss": 3.3907293818892916, "tokens_seen": 2303852544 }, { "epoch": 0.39, "learning_rate": 0.0006139670223084384, "loss": 1.4325, "theoretical_loss": 3.39066682899397, "tokens_seen": 2304376832 }, { "epoch": 0.39, "learning_rate": 0.0006136437116068542, "loss": 1.4178, "theoretical_loss": 3.390604294312868, "tokens_seen": 2304901120 }, { "epoch": 0.39, "learning_rate": 0.00061332040090527, "loss": 1.389, "theoretical_loss": 3.3905417778365408, "tokens_seen": 2305425408 }, { "epoch": 0.39, "objective/train/advantage_avg": -0.0040808627381920815, "objective/train/docs_used": 1299212, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.902719020843506, "objective/train/original_loss": 2.9027187824249268, "objective/train/theoretical_loss": 3.390533964556553, "objective/train/tokens_used": 664350176, "objective/train/value_avg": -0.0260162353515625, "objective/train/value_loss": 0.011381421238183975, "objective/train/value_max": -0.0004372596740722656, "objective/train/value_min": -0.94580078125, "objective/train/value_reward_corr": 0.5524923786925632, "objective/train/value_std": 0.045196533203125, "objective/train/weight_avg": 0.9996477365493774, "objective/train/weighted_lm_loss": 2.9027199745178223, "objective/train/weights_max": 1.0574829578399658, "objective/train/weights_min": 0.9125813841819763, "theoretical_loss": 3.390533964556553, "tokens_seen": 2305490944 }, { "epoch": 0.39, "learning_rate": 0.0006129970902036858, "loss": 1.3717, "theoretical_loss": 3.3904792795555503, "tokens_seen": 2305949696 }, { "epoch": 0.39, "learning_rate": 0.0006126737795021015, "loss": 1.4443, "theoretical_loss": 3.390416799460466, "tokens_seen": 2306473984 }, { "epoch": 0.39, "learning_rate": 0.0006123504688005173, "loss": 1.4258, "theoretical_loss": 3.3903543375418637, "tokens_seen": 2306998272 }, { "epoch": 0.39, "objective/train/advantage_avg": -0.003170827403664589, "objective/train/docs_used": 1299979, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.834336042404175, "objective/train/original_loss": 2.8343358039855957, "objective/train/theoretical_loss": 3.390338724901185, "objective/train/tokens_used": 665988576, "objective/train/value_avg": -0.021881103515625, "objective/train/value_loss": 0.010097242891788483, "objective/train/value_max": -0.0008625984191894531, "objective/train/value_min": -0.962890625, "objective/train/value_reward_corr": 0.44907459702430225, "objective/train/value_std": 0.042266845703125, "objective/train/weight_avg": 0.999732494354248, "objective/train/weighted_lm_loss": 2.8333940505981445, "objective/train/weights_max": 1.0962185859680176, "objective/train/weights_min": 0.9082434773445129, "theoretical_loss": 3.390338724901185, "tokens_seen": 2307129344 }, { "epoch": 0.39, "learning_rate": 0.000612027158098933, "loss": 1.4259, "theoretical_loss": 3.390291893790327, "tokens_seen": 2307522560 }, { "epoch": 0.39, "learning_rate": 0.0006117038473973488, "loss": 1.4002, "theoretical_loss": 3.390229468196446, "tokens_seen": 2308046848 }, { "epoch": 0.39, "learning_rate": 0.0006113805366957647, "loss": 1.4028, "theoretical_loss": 3.3901670607508176, "tokens_seen": 2308571136 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0012533609988167882, "objective/train/docs_used": 1301383, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.878927707672119, "objective/train/original_loss": 2.878927707672119, "objective/train/theoretical_loss": 3.3901436626356354, "objective/train/tokens_used": 667626976, "objective/train/value_avg": -0.0206756591796875, "objective/train/value_loss": 0.0053587304428219795, "objective/train/value_max": -0.0006747245788574219, "objective/train/value_min": -0.97607421875, "objective/train/value_reward_corr": 0.6068027196629101, "objective/train/value_std": 0.051849365234375, "objective/train/weight_avg": 1.000151515007019, "objective/train/weighted_lm_loss": 2.879830837249756, "objective/train/weights_max": 1.0465197563171387, "objective/train/weights_min": 0.9088003039360046, "theoretical_loss": 3.3901436626356354, "tokens_seen": 2308767744 }, { "epoch": 0.4, "learning_rate": 0.0006110572259941804, "loss": 1.4045, "theoretical_loss": 3.390104671444046, "tokens_seen": 2309095424 }, { "epoch": 0.4, "learning_rate": 0.0006107339152925962, "loss": 1.4285, "theoretical_loss": 3.3900423002667432, "tokens_seen": 2309619712 }, { "epoch": 0.4, "learning_rate": 0.0006104106045910119, "loss": 1.4063, "theoretical_loss": 3.3899799472095267, "tokens_seen": 2310144000 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0018447662005200982, "objective/train/docs_used": 1301708, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8854734897613525, "objective/train/original_loss": 2.8854732513427734, "objective/train/theoretical_loss": 3.3899487774730206, "objective/train/tokens_used": 669265376, "objective/train/value_avg": -0.0197296142578125, "objective/train/value_loss": 0.004929563961923122, "objective/train/value_max": -0.0008459091186523438, "objective/train/value_min": -0.97509765625, "objective/train/value_reward_corr": 0.3828174209340456, "objective/train/value_std": 0.028594970703125, "objective/train/weight_avg": 1.0002087354660034, "objective/train/weighted_lm_loss": 2.8868143558502197, "objective/train/weights_max": 1.0800594091415405, "objective/train/weights_min": 0.9091506004333496, "theoretical_loss": 3.3899487774730206, "tokens_seen": 2310406144 }, { "epoch": 0.4, "learning_rate": 0.0006100872938894277, "loss": 1.4314, "theoretical_loss": 3.3899176122630217, "tokens_seen": 2310668288 }, { "epoch": 0.4, "learning_rate": 0.0006097639831878436, "loss": 1.4196, "theoretical_loss": 3.3898552954178602, "tokens_seen": 2311192576 }, { "epoch": 0.4, "learning_rate": 0.0006094406724862593, "loss": 1.4003, "theoretical_loss": 3.3897929966646814, "tokens_seen": 2311716864 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0010243852157145739, "objective/train/docs_used": 1302992, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.883061647415161, "objective/train/original_loss": 2.883060932159424, "objective/train/theoretical_loss": 3.3897540691271235, "objective/train/tokens_used": 670903776, "objective/train/value_avg": -0.01904296875, "objective/train/value_loss": 0.0058153606951236725, "objective/train/value_max": -0.0005974769592285156, "objective/train/value_min": -0.96875, "objective/train/value_reward_corr": 0.5295660121815561, "objective/train/value_std": 0.029052734375, "objective/train/weight_avg": 1.0001310110092163, "objective/train/weighted_lm_loss": 2.8844153881073, "objective/train/weights_max": 1.0560601949691772, "objective/train/weights_min": 0.9100872874259949, "theoretical_loss": 3.3897540691271235, "tokens_seen": 2312044544 }, { "epoch": 0.4, "learning_rate": 0.0006091173617846752, "loss": 1.4139, "theoretical_loss": 3.389730715994132, "tokens_seen": 2312241152 }, { "epoch": 0.4, "learning_rate": 0.0006087940510830908, "loss": 1.4009, "theoretical_loss": 3.3896684533968644, "tokens_seen": 2312765440 }, { "epoch": 0.4, "learning_rate": 0.0006084707403815066, "loss": 1.3855, "theoretical_loss": 3.389606208863539, "tokens_seen": 2313289728 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.010873072780668736, "objective/train/docs_used": 1303654, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.736301898956299, "objective/train/original_loss": 2.736301898956299, "objective/train/theoretical_loss": 3.389559537312392, "objective/train/tokens_used": 672542176, "objective/train/value_avg": -0.015777587890625, "objective/train/value_loss": 0.001108345459215343, "objective/train/value_max": -0.0006384849548339844, "objective/train/value_min": -0.3671875, "objective/train/value_reward_corr": 0.17948855264075853, "objective/train/value_std": 0.01751708984375, "objective/train/weight_avg": 1.001092791557312, "objective/train/weighted_lm_loss": 2.739657402038574, "objective/train/weights_max": 1.0243074893951416, "objective/train/weights_min": 0.9119157195091248, "theoretical_loss": 3.389559537312392, "tokens_seen": 2313682944 }, { "epoch": 0.4, "learning_rate": 0.0006081474296799225, "loss": 1.4222, "theoretical_loss": 3.3895439823848226, "tokens_seen": 2313814016 }, { "epoch": 0.4, "learning_rate": 0.0006078241189783382, "loss": 1.3815, "theoretical_loss": 3.3894817739513887, "tokens_seen": 2314338304 }, { "epoch": 0.4, "learning_rate": 0.000607500808276754, "loss": 1.3708, "theoretical_loss": 3.3894195835539187, "tokens_seen": 2314862592 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.005411440972238779, "objective/train/docs_used": 1304892, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7610666751861572, "objective/train/original_loss": 2.761066198348999, "objective/train/theoretical_loss": 3.389365181743938, "objective/train/tokens_used": 674180576, "objective/train/value_avg": -0.0162200927734375, "objective/train/value_loss": 0.002056594006717205, "objective/train/value_max": -0.0003814697265625, "objective/train/value_min": -0.75390625, "objective/train/value_reward_corr": 0.41351263734419275, "objective/train/value_std": 0.022247314453125, "objective/train/weight_avg": 1.0005513429641724, "objective/train/weighted_lm_loss": 2.7631125450134277, "objective/train/weights_max": 1.040544033050537, "objective/train/weights_min": 0.9202457666397095, "theoretical_loss": 3.389365181743938, "tokens_seen": 2315321344 }, { "epoch": 0.4, "learning_rate": 0.0006071774975751697, "loss": 1.375, "theoretical_loss": 3.3893574111830995, "tokens_seen": 2315386880 }, { "epoch": 0.4, "learning_rate": 0.0006068541868735855, "loss": 1.3645, "theoretical_loss": 3.3892952568296266, "tokens_seen": 2315911168 }, { "epoch": 0.4, "learning_rate": 0.0006065308761720014, "loss": 1.3759, "theoretical_loss": 3.3892331204842012, "tokens_seen": 2316435456 }, { "debugging/Self-BLEU-5": 0.3784563223879985, "debugging/distinct-1-grams": 0.7849709164106647, "debugging/distinct-2-grams": 0.970948355640835, "debugging/entropy-1-grams": 5.63270196596545, "debugging/entropy-2-grams": 6.2855658785747845, "debugging/length": 527.5714285714286, "debugging/num_segments": 7, "debugging/raw_token_scores_avg": 0.01023189164698124, "debugging/raw_token_scores_std": 0.04704361408948898, "epoch": 0.4, "objective/train/advantage_avg": 0.00922432728111744, "objective/train/docs_used": 1305637, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.097170352935791, "objective/train/original_loss": 3.097170829772949, "objective/train/theoretical_loss": 3.389171002137532, "objective/train/tokens_used": 675818976, "objective/train/value_avg": -0.019439697265625, "objective/train/value_loss": 0.0022370691876858473, "objective/train/value_max": -0.0007238388061523438, "objective/train/value_min": -0.7939453125, "objective/train/value_reward_corr": 0.28574698925620295, "objective/train/value_std": 0.024505615234375, "objective/train/weight_avg": 1.0009335279464722, "objective/train/weighted_lm_loss": 3.100485324859619, "objective/train/weights_max": 1.0601301193237305, "objective/train/weights_min": 0.9290785193443298, "theoretical_loss": 3.389171002137532, "tokens_seen": 2316959744 }, { "epoch": 0.4, "learning_rate": 0.0006062075654704171, "loss": 1.3619, "theoretical_loss": 3.389171002137532, "tokens_seen": 2316959744 }, { "epoch": 0.4, "learning_rate": 0.0006058842547688329, "loss": 1.3666, "theoretical_loss": 3.3891089017803337, "tokens_seen": 2317484032 }, { "epoch": 0.4, "learning_rate": 0.0006055609440672486, "loss": 1.3349, "theoretical_loss": 3.389046819403329, "tokens_seen": 2318008320 }, { "epoch": 0.4, "learning_rate": 0.0006052376333656644, "loss": 1.342, "theoretical_loss": 3.388984754997247, "tokens_seen": 2318532608 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.009325896389782429, "objective/train/docs_used": 1307149, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.781954288482666, "objective/train/original_loss": 2.781954288482666, "objective/train/theoretical_loss": 3.388976998209606, "objective/train/tokens_used": 677457376, "objective/train/value_avg": -0.0158843994140625, "objective/train/value_loss": 0.0009684950928203762, "objective/train/value_max": -0.0006694793701171875, "objective/train/value_min": -0.810546875, "objective/train/value_reward_corr": 0.3506803139548515, "objective/train/value_std": 0.0208587646484375, "objective/train/weight_avg": 1.0009374618530273, "objective/train/weighted_lm_loss": 2.7851850986480713, "objective/train/weights_max": 1.0456058979034424, "objective/train/weights_min": 0.9647775292396545, "theoretical_loss": 3.388976998209606, "tokens_seen": 2318598144 }, { "epoch": 0.4, "learning_rate": 0.0006049143226640801, "loss": 1.3488, "theoretical_loss": 3.388922708552824, "tokens_seen": 2319056896 }, { "epoch": 0.4, "learning_rate": 0.000604591011962496, "loss": 1.3522, "theoretical_loss": 3.388860680060802, "tokens_seen": 2319581184 }, { "epoch": 0.4, "learning_rate": 0.0006042677012609118, "loss": 1.3671, "theoretical_loss": 3.3887986695119316, "tokens_seen": 2320105472 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.006009617354720831, "objective/train/docs_used": 1307847, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.041110038757324, "objective/train/original_loss": 3.041110038757324, "objective/train/theoretical_loss": 3.388783169677248, "objective/train/tokens_used": 679095776, "objective/train/value_avg": -0.0148468017578125, "objective/train/value_loss": 0.0021474456880241632, "objective/train/value_max": -0.0004124641418457031, "objective/train/value_min": -0.66064453125, "objective/train/value_reward_corr": 0.23724010558650502, "objective/train/value_std": 0.0175628662109375, "objective/train/weight_avg": 1.0006115436553955, "objective/train/weighted_lm_loss": 3.0434036254882812, "objective/train/weights_max": 1.065212368965149, "objective/train/weights_min": 0.9158071279525757, "theoretical_loss": 3.388783169677248, "tokens_seen": 2320236544 }, { "epoch": 0.4, "learning_rate": 0.0006039443905593275, "loss": 1.3355, "theoretical_loss": 3.3887366768969684, "tokens_seen": 2320629760 }, { "epoch": 0.4, "learning_rate": 0.0006036210798577433, "loss": 1.3732, "theoretical_loss": 3.3886747022066768, "tokens_seen": 2321154048 }, { "epoch": 0.4, "learning_rate": 0.000603297769156159, "loss": 1.3475, "theoretical_loss": 3.3886127454318267, "tokens_seen": 2321678336 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.012630046345293522, "objective/train/docs_used": 1309222, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.63545560836792, "objective/train/original_loss": 2.635455369949341, "objective/train/theoretical_loss": 3.3885895162582003, "objective/train/tokens_used": 680734176, "objective/train/value_avg": -0.0216217041015625, "objective/train/value_loss": 0.002171108964830637, "objective/train/value_max": -0.00044417381286621094, "objective/train/value_min": -0.9658203125, "objective/train/value_reward_corr": 0.6172681515327447, "objective/train/value_std": 0.03753662109375, "objective/train/weight_avg": 1.0012736320495605, "objective/train/weighted_lm_loss": 2.6395363807678223, "objective/train/weights_max": 1.034324288368225, "objective/train/weights_min": 0.9173678755760193, "theoretical_loss": 3.3885895162582003, "tokens_seen": 2321874944 }, { "epoch": 0.4, "learning_rate": 0.0006029744584545749, "loss": 1.2901, "theoretical_loss": 3.3885508065631953, "tokens_seen": 2322202624 }, { "epoch": 0.4, "learning_rate": 0.0006026511477529907, "loss": 1.3852, "theoretical_loss": 3.388488885591566, "tokens_seen": 2322726912 }, { "epoch": 0.4, "learning_rate": 0.0006023278370514063, "loss": 1.3379, "theoretical_loss": 3.3884269825077302, "tokens_seen": 2323251200 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.010763294994831085, "objective/train/docs_used": 1309762, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8907158374786377, "objective/train/original_loss": 2.890716075897217, "objective/train/theoretical_loss": 3.3883960376708586, "objective/train/tokens_used": 682372576, "objective/train/value_avg": -0.016204833984375, "objective/train/value_loss": 0.0016031955601647496, "objective/train/value_max": -0.0007824897766113281, "objective/train/value_min": -0.66552734375, "objective/train/value_reward_corr": 0.19363905107613172, "objective/train/value_std": 0.0190582275390625, "objective/train/weight_avg": 1.0010842084884644, "objective/train/weighted_lm_loss": 2.8944802284240723, "objective/train/weights_max": 1.0646263360977173, "objective/train/weights_min": 0.9266166090965271, "theoretical_loss": 3.3883960376708586, "tokens_seen": 2323513344 }, { "epoch": 0.4, "learning_rate": 0.0006020045263498222, "loss": 1.3236, "theoretical_loss": 3.388365097302485, "tokens_seen": 2323775488 }, { "epoch": 0.4, "learning_rate": 0.0006016812156482379, "loss": 1.3536, "theoretical_loss": 3.388303229966635, "tokens_seen": 2324299776 }, { "epoch": 0.4, "learning_rate": 0.0006013579049466538, "loss": 1.3148, "theoretical_loss": 3.388241380490992, "tokens_seen": 2324824064 }, { "epoch": 0.4, "objective/train/advantage_avg": 5.4690466640749946e-05, "objective/train/docs_used": 1311230, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4269635677337646, "objective/train/original_loss": 2.4269630908966064, "objective/train/theoretical_loss": 3.3882027336342704, "objective/train/tokens_used": 684010976, "objective/train/value_avg": -0.018585205078125, "objective/train/value_loss": 0.0011452658800408244, "objective/train/value_max": -0.000469207763671875, "objective/train/value_min": -0.403564453125, "objective/train/value_reward_corr": 0.5082777797100462, "objective/train/value_std": 0.01983642578125, "objective/train/weight_avg": 1.0000112056732178, "objective/train/weighted_lm_loss": 2.4285614490509033, "objective/train/weights_max": 1.0279436111450195, "objective/train/weights_min": 0.9383065104484558, "theoretical_loss": 3.3882027336342704, "tokens_seen": 2325151744 }, { "epoch": 0.41, "learning_rate": 0.0006010345942450696, "loss": 1.3394, "theoretical_loss": 3.388179548866373, "tokens_seen": 2325348352 }, { "epoch": 0.41, "learning_rate": 0.0006007112835434852, "loss": 1.3278, "theoretical_loss": 3.3881177350836027, "tokens_seen": 2325872640 }, { "epoch": 0.41, "learning_rate": 0.0006003879728419011, "loss": 1.3381, "theoretical_loss": 3.388055939133513, "tokens_seen": 2326396928 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.005769002716988325, "objective/train/docs_used": 1311908, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7540321350097656, "objective/train/original_loss": 2.7540323734283447, "objective/train/theoretical_loss": 3.3880096038681313, "objective/train/tokens_used": 685649376, "objective/train/value_avg": -0.024261474609375, "objective/train/value_loss": 0.00386273255571723, "objective/train/value_max": -0.0004916191101074219, "objective/train/value_min": -0.96875, "objective/train/value_reward_corr": 0.7167252933986944, "objective/train/value_std": 0.063720703125, "objective/train/weight_avg": 1.000596046447754, "objective/train/weighted_lm_loss": 2.756448984146118, "objective/train/weights_max": 1.0883804559707642, "objective/train/weights_min": 0.9164791107177734, "theoretical_loss": 3.3880096038681313, "tokens_seen": 2326790144 }, { "epoch": 0.41, "learning_rate": 0.0006000646621403168, "loss": 1.3022, "theoretical_loss": 3.387994161006943, "tokens_seen": 2326921216 }, { "epoch": 0.41, "learning_rate": 0.0005997413514387327, "loss": 1.2956, "theoretical_loss": 3.3879324006947367, "tokens_seen": 2327445504 }, { "epoch": 0.41, "learning_rate": 0.0005994180407371485, "loss": 1.3371, "theoretical_loss": 3.3878706581877465, "tokens_seen": 2327969792 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.01353497989475727, "objective/train/docs_used": 1312576, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3028969764709473, "objective/train/original_loss": 2.302896738052368, "objective/train/theoretical_loss": 3.3878166480927843, "objective/train/tokens_used": 687287776, "objective/train/value_avg": -0.04827880859375, "objective/train/value_loss": 0.0021474880632013083, "objective/train/value_max": -0.0002913475036621094, "objective/train/value_min": -0.9921875, "objective/train/value_reward_corr": 0.9674191905040781, "objective/train/value_std": 0.145751953125, "objective/train/weight_avg": 1.0013643503189087, "objective/train/weighted_lm_loss": 2.305866003036499, "objective/train/weights_max": 1.0522360801696777, "objective/train/weights_min": 0.9553333520889282, "theoretical_loss": 3.3878166480927843, "tokens_seen": 2328428544 }, { "epoch": 0.41, "learning_rate": 0.0005990947300355641, "loss": 1.2899, "theoretical_loss": 3.3878089334768307, "tokens_seen": 2328494080 }, { "epoch": 0.41, "learning_rate": 0.00059877141933398, "loss": 1.2831, "theoretical_loss": 3.3877472265528548, "tokens_seen": 2329018368 }, { "epoch": 0.41, "learning_rate": 0.0005984481086323957, "loss": 1.3221, "theoretical_loss": 3.3876855374066905, "tokens_seen": 2329542656 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.007597735151648521, "objective/train/docs_used": 1313793, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3760271072387695, "objective/train/original_loss": 2.3760266304016113, "objective/train/theoretical_loss": 3.3876238660292177, "objective/train/tokens_used": 688926176, "objective/train/value_avg": -0.014923095703125, "objective/train/value_loss": 0.0008382010855711997, "objective/train/value_max": -0.0005526542663574219, "objective/train/value_min": -0.5673828125, "objective/train/value_reward_corr": 0.5103546604740474, "objective/train/value_std": 0.023162841796875, "objective/train/weight_avg": 1.0007638931274414, "objective/train/weighted_lm_loss": 2.3782427310943604, "objective/train/weights_max": 1.04811429977417, "objective/train/weights_min": 0.93355393409729, "theoretical_loss": 3.3876238660292177, "tokens_seen": 2330066944 }, { "epoch": 0.41, "learning_rate": 0.0005981247979308115, "loss": 1.3163, "theoretical_loss": 3.3876238660292177, "tokens_seen": 2330066944 }, { "epoch": 0.41, "learning_rate": 0.0005978014872292274, "loss": 1.3663, "theoretical_loss": 3.387562212411321, "tokens_seen": 2330591232 }, { "epoch": 0.41, "learning_rate": 0.000597478176527643, "loss": 1.3648, "theoretical_loss": 3.3875005765438932, "tokens_seen": 2331115520 }, { "epoch": 0.41, "learning_rate": 0.0005971548658260589, "loss": 1.3521, "theoretical_loss": 3.387438958417833, "tokens_seen": 2331639808 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0052346824668347836, "objective/train/docs_used": 1314556, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.0033488273620605, "objective/train/original_loss": 3.0033485889434814, "objective/train/theoretical_loss": 3.3874312573990624, "objective/train/tokens_used": 690564576, "objective/train/value_avg": -0.015899658203125, "objective/train/value_loss": 0.002307373099029064, "objective/train/value_max": -0.0003936290740966797, "objective/train/value_min": -0.43408203125, "objective/train/value_reward_corr": 0.3085833199922837, "objective/train/value_std": 0.0196685791015625, "objective/train/weight_avg": 1.0005348920822144, "objective/train/weighted_lm_loss": 3.0053563117980957, "objective/train/weights_max": 1.0333309173583984, "objective/train/weights_min": 0.9349931478500366, "theoretical_loss": 3.3874312573990624, "tokens_seen": 2331705344 }, { "epoch": 0.41, "learning_rate": 0.0005968315551244746, "loss": 1.3689, "theoretical_loss": 3.3873773580240467, "tokens_seen": 2332164096 }, { "epoch": 0.41, "learning_rate": 0.0005965082444228904, "loss": 1.3338, "theoretical_loss": 3.3873157753534455, "tokens_seen": 2332688384 }, { "epoch": 0.41, "learning_rate": 0.0005961849337213063, "loss": 1.2831, "theoretical_loss": 3.38725421039695, "tokens_seen": 2333212672 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0035420567728579044, "objective/train/docs_used": 1315828, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.574138641357422, "objective/train/original_loss": 2.5741384029388428, "objective/train/theoretical_loss": 3.3872388219245915, "objective/train/tokens_used": 692202976, "objective/train/value_avg": -0.01532745361328125, "objective/train/value_loss": 0.0023467305582016706, "objective/train/value_max": -0.0005679130554199219, "objective/train/value_min": -0.60693359375, "objective/train/value_reward_corr": 0.3956331157093077, "objective/train/value_std": 0.023956298828125, "objective/train/weight_avg": 1.0003657341003418, "objective/train/weighted_lm_loss": 2.57560396194458, "objective/train/weights_max": 1.0500633716583252, "objective/train/weights_min": 0.9312633872032166, "theoretical_loss": 3.3872388219245915, "tokens_seen": 2333343744 }, { "epoch": 0.41, "learning_rate": 0.0005958616230197219, "loss": 1.3288, "theoretical_loss": 3.3871926631454854, "tokens_seen": 2333736960 }, { "epoch": 0.41, "learning_rate": 0.0005955383123181377, "loss": 1.3042, "theoretical_loss": 3.387131133589984, "tokens_seen": 2334261248 }, { "epoch": 0.41, "learning_rate": 0.0005952150016165535, "loss": 1.3349, "theoretical_loss": 3.3870696217213854, "tokens_seen": 2334785536 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.011136913672089577, "objective/train/docs_used": 1316418, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3145954608917236, "objective/train/original_loss": 2.3145952224731445, "objective/train/theoretical_loss": 3.387046559328717, "objective/train/tokens_used": 693841376, "objective/train/value_avg": -0.0268096923828125, "objective/train/value_loss": 0.003653236897662282, "objective/train/value_max": -0.0009737014770507812, "objective/train/value_min": -0.97021484375, "objective/train/value_reward_corr": 0.7307457937604849, "objective/train/value_std": 0.061553955078125, "objective/train/weight_avg": 1.0011316537857056, "objective/train/weighted_lm_loss": 2.318331241607666, "objective/train/weights_max": 1.0584571361541748, "objective/train/weights_min": 0.9101923108100891, "theoretical_loss": 3.387046559328717, "tokens_seen": 2334982144 }, { "epoch": 0.41, "learning_rate": 0.0005948916909149693, "loss": 1.2961, "theoretical_loss": 3.3870081275306347, "tokens_seen": 2335309824 }, { "epoch": 0.41, "learning_rate": 0.0005945683802133852, "loss": 1.2968, "theoretical_loss": 3.386946651008685, "tokens_seen": 2335834112 }, { "epoch": 0.41, "learning_rate": 0.0005942450695118009, "loss": 1.2546, "theoretical_loss": 3.3868851921464964, "tokens_seen": 2336358400 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.01148162316530943, "objective/train/docs_used": 1317715, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.65732741355896, "objective/train/original_loss": 2.657327651977539, "objective/train/theoretical_loss": 3.3868544693349882, "objective/train/tokens_used": 695479776, "objective/train/value_avg": -0.03411865234375, "objective/train/value_loss": 0.0035432525910437107, "objective/train/value_max": -0.000614166259765625, "objective/train/value_min": -0.90673828125, "objective/train/value_reward_corr": 0.6877587407166935, "objective/train/value_std": 0.07135009765625, "objective/train/weight_avg": 1.0011658668518066, "objective/train/weighted_lm_loss": 2.6613686084747314, "objective/train/weights_max": 1.0702282190322876, "objective/train/weights_min": 0.9160781502723694, "theoretical_loss": 3.3868544693349882, "tokens_seen": 2336620544 }, { "epoch": 0.41, "learning_rate": 0.0005939217588102166, "loss": 1.2892, "theoretical_loss": 3.386823750935033, "tokens_seen": 2336882688 }, { "epoch": 0.41, "learning_rate": 0.0005935984481086324, "loss": 1.3359, "theoretical_loss": 3.3867623273652683, "tokens_seen": 2337406976 }, { "epoch": 0.41, "learning_rate": 0.0005932751374070482, "loss": 1.3023, "theoretical_loss": 3.3867009214281807, "tokens_seen": 2337931264 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.005643374752253294, "objective/train/docs_used": 1318542, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7846899032592773, "objective/train/original_loss": 2.7846903800964355, "objective/train/theoretical_loss": 3.3866625516675897, "objective/train/tokens_used": 697118176, "objective/train/value_avg": -0.0181427001953125, "objective/train/value_loss": 0.0031875078566372395, "objective/train/value_max": -0.00013339519500732422, "objective/train/value_min": -0.85986328125, "objective/train/value_reward_corr": 0.5056996511367979, "objective/train/value_std": 0.042755126953125, "objective/train/weight_avg": 1.000580072402954, "objective/train/weighted_lm_loss": 2.7870752811431885, "objective/train/weights_max": 1.0747584104537964, "objective/train/weights_min": 0.916345477104187, "theoretical_loss": 3.3866625516675897, "tokens_seen": 2338258944 }, { "epoch": 0.41, "learning_rate": 0.0005929518267054639, "loss": 1.3003, "theoretical_loss": 3.386639533114757, "tokens_seen": 2338455552 }, { "epoch": 0.41, "learning_rate": 0.0005926285160038798, "loss": 1.2864, "theoretical_loss": 3.3865781624159887, "tokens_seen": 2338979840 }, { "epoch": 0.41, "learning_rate": 0.0005923052053022955, "loss": 1.3386, "theoretical_loss": 3.3865168093228757, "tokens_seen": 2339504128 }, { "epoch": 0.41, "objective/train/advantage_avg": -0.006336298771202564, "objective/train/docs_used": 1319951, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.111819267272949, "objective/train/original_loss": 2.111819267272949, "objective/train/theoretical_loss": 3.3864708060513404, "objective/train/tokens_used": 698756576, "objective/train/value_avg": -0.0215911865234375, "objective/train/value_loss": 0.005582404788583517, "objective/train/value_max": -0.0005154609680175781, "objective/train/value_min": -0.900390625, "objective/train/value_reward_corr": 0.6618862157017645, "objective/train/value_std": 0.03790283203125, "objective/train/weight_avg": 0.9993939399719238, "objective/train/weighted_lm_loss": 2.1101250648498535, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9294102191925049, "theoretical_loss": 3.3864708060513404, "tokens_seen": 2339897344 }, { "epoch": 0.41, "learning_rate": 0.0005919818946007113, "loss": 1.3254, "theoretical_loss": 3.3864554738264228, "tokens_seen": 2340028416 }, { "epoch": 0.41, "learning_rate": 0.0005916585838991271, "loss": 1.3026, "theoretical_loss": 3.3863941559176425, "tokens_seen": 2340552704 }, { "epoch": 0.41, "learning_rate": 0.0005913352731975428, "loss": 1.2882, "theoretical_loss": 3.3863328555875536, "tokens_seen": 2341076992 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.006050213240087032, "objective/train/docs_used": 1320683, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.780933141708374, "objective/train/original_loss": 2.780932903289795, "objective/train/theoretical_loss": 3.386279232211691, "objective/train/tokens_used": 700394976, "objective/train/value_avg": -0.01280975341796875, "objective/train/value_loss": 0.0012926902854815125, "objective/train/value_max": -0.00017952919006347656, "objective/train/value_min": -0.552734375, "objective/train/value_reward_corr": 0.42529685020534136, "objective/train/value_std": 0.018280029296875, "objective/train/weight_avg": 1.000611424446106, "objective/train/weighted_lm_loss": 2.783144950866699, "objective/train/weights_max": 1.0329355001449585, "objective/train/weights_min": 0.917177140712738, "theoretical_loss": 3.386279232211691, "tokens_seen": 2341535744 }, { "epoch": 0.42, "learning_rate": 0.0005910119624959587, "loss": 1.3449, "theoretical_loss": 3.3862715728271815, "tokens_seen": 2341601280 }, { "epoch": 0.42, "learning_rate": 0.0005906886517943744, "loss": 1.3123, "theoretical_loss": 3.3862103076275587, "tokens_seen": 2342125568 }, { "epoch": 0.42, "learning_rate": 0.0005903653410927901, "loss": 1.2966, "theoretical_loss": 3.386149059979723, "tokens_seen": 2342649856 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.028160328045487404, "objective/train/docs_used": 1321245, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2278332710266113, "objective/train/original_loss": 2.2278330326080322, "objective/train/theoretical_loss": 3.3860878298747203, "objective/train/tokens_used": 702033376, "objective/train/value_avg": -0.03533935546875, "objective/train/value_loss": 0.0249522365629673, "objective/train/value_max": -0.00034880638122558594, "objective/train/value_min": -0.93994140625, "objective/train/value_reward_corr": 0.8076643775918029, "objective/train/value_std": 0.07342529296875, "objective/train/weight_avg": 0.9973063468933105, "objective/train/weighted_lm_loss": 2.2243921756744385, "objective/train/weights_max": 1.0633556842803955, "objective/train/weights_min": 0.909092903137207, "theoretical_loss": 3.3860878298747203, "tokens_seen": 2343174144 }, { "epoch": 0.42, "learning_rate": 0.000590042030391206, "loss": 1.2788, "theoretical_loss": 3.3860878298747203, "tokens_seen": 2343174144 }, { "epoch": 0.42, "learning_rate": 0.0005897187196896217, "loss": 1.3198, "theoretical_loss": 3.386026617303602, "tokens_seen": 2343698432 }, { "epoch": 0.42, "learning_rate": 0.0005893954089880376, "loss": 1.3035, "theoretical_loss": 3.385965422257427, "tokens_seen": 2344222720 }, { "epoch": 0.42, "learning_rate": 0.0005890720982864532, "loss": 1.3051, "theoretical_loss": 3.3859042447272594, "tokens_seen": 2344747008 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.00689357565715909, "objective/train/docs_used": 1322388, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7013490200042725, "objective/train/original_loss": 2.7013492584228516, "objective/train/theoretical_loss": 3.385896598767138, "objective/train/tokens_used": 703671776, "objective/train/value_avg": -0.01438140869140625, "objective/train/value_loss": 0.000704017176758498, "objective/train/value_max": -0.0005211830139160156, "objective/train/value_min": -0.6337890625, "objective/train/value_reward_corr": 0.39532028109310874, "objective/train/value_std": 0.017242431640625, "objective/train/weight_avg": 1.0006928443908691, "objective/train/weighted_lm_loss": 2.703871965408325, "objective/train/weights_max": 1.0509085655212402, "objective/train/weights_min": 0.9674392938613892, "theoretical_loss": 3.385896598767138, "tokens_seen": 2344812544 }, { "epoch": 0.42, "learning_rate": 0.000588748787584869, "loss": 1.3176, "theoretical_loss": 3.3858430847041707, "tokens_seen": 2345271296 }, { "epoch": 0.42, "learning_rate": 0.0005884254768832849, "loss": 1.3296, "theoretical_loss": 3.3857819421792392, "tokens_seen": 2345795584 }, { "epoch": 0.42, "learning_rate": 0.0005881021661817006, "loss": 1.2977, "theoretical_loss": 3.3857208171435493, "tokens_seen": 2346319872 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.009214132092893124, "objective/train/docs_used": 1323122, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7646055221557617, "objective/train/original_loss": 2.7646052837371826, "objective/train/theoretical_loss": 3.3857055386162767, "objective/train/tokens_used": 705310176, "objective/train/value_avg": -0.033416748046875, "objective/train/value_loss": 0.016344456002116203, "objective/train/value_max": -0.00047659873962402344, "objective/train/value_min": -0.98681640625, "objective/train/value_reward_corr": 0.41342555457292723, "objective/train/value_std": 0.06878662109375, "objective/train/weight_avg": 0.9991588592529297, "objective/train/weighted_lm_loss": 2.7615950107574463, "objective/train/weights_max": 1.0844205617904663, "objective/train/weights_min": 0.9068005681037903, "theoretical_loss": 3.3857055386162767, "tokens_seen": 2346450944 }, { "epoch": 0.42, "learning_rate": 0.0005877788554801165, "loss": 1.3331, "theoretical_loss": 3.385659709588192, "tokens_seen": 2346844160 }, { "epoch": 0.42, "learning_rate": 0.0005874555447785321, "loss": 1.3252, "theoretical_loss": 3.385598619504265, "tokens_seen": 2347368448 }, { "epoch": 0.42, "learning_rate": 0.0005871322340769479, "loss": 1.3281, "theoretical_loss": 3.3855375468828726, "tokens_seen": 2347892736 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.008945022709667683, "objective/train/docs_used": 1324638, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.825010061264038, "objective/train/original_loss": 2.825009822845459, "objective/train/theoretical_loss": 3.3855146491500956, "objective/train/tokens_used": 706948576, "objective/train/value_avg": -0.01230621337890625, "objective/train/value_loss": 0.00026577559765428305, "objective/train/value_max": -0.0004107952117919922, "objective/train/value_min": -0.149169921875, "objective/train/value_reward_corr": 0.21553942055277048, "objective/train/value_std": 0.0108489990234375, "objective/train/weight_avg": 1.0008958578109741, "objective/train/weighted_lm_loss": 2.827969551086426, "objective/train/weights_max": 1.0149215459823608, "objective/train/weights_min": 0.9882123470306396, "theoretical_loss": 3.3855146491500956, "tokens_seen": 2348089344 }, { "epoch": 0.42, "learning_rate": 0.0005868089233753638, "loss": 1.304, "theoretical_loss": 3.385476491715125, "tokens_seen": 2348417024 }, { "epoch": 0.42, "learning_rate": 0.0005864856126737795, "loss": 1.3231, "theoretical_loss": 3.3854154539921395, "tokens_seen": 2348941312 }, { "epoch": 0.42, "learning_rate": 0.0005861623019721953, "loss": 1.3086, "theoretical_loss": 3.38535443370504, "tokens_seen": 2349465600 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.007703840266913176, "objective/train/docs_used": 1325195, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.66892671585083, "objective/train/original_loss": 2.66892671585083, "objective/train/theoretical_loss": 3.385323930097175, "objective/train/tokens_used": 708586976, "objective/train/value_avg": -0.01483154296875, "objective/train/value_loss": 0.0019039205508306623, "objective/train/value_max": -0.0006361007690429688, "objective/train/value_min": -0.93701171875, "objective/train/value_reward_corr": 0.6080408323948283, "objective/train/value_std": 0.032440185546875, "objective/train/weight_avg": 1.0007797479629517, "objective/train/weighted_lm_loss": 2.6713197231292725, "objective/train/weights_max": 1.0599017143249512, "objective/train/weights_min": 0.9119632840156555, "theoretical_loss": 3.385323930097175, "tokens_seen": 2349727744 }, { "epoch": 0.42, "learning_rate": 0.000585838991270611, "loss": 1.293, "theoretical_loss": 3.385293430844956, "tokens_seen": 2349989888 }, { "epoch": 0.42, "learning_rate": 0.0005855156805690268, "loss": 1.2839, "theoretical_loss": 3.385232445403024, "tokens_seen": 2350514176 }, { "epoch": 0.42, "learning_rate": 0.0005851923698674427, "loss": 1.3009, "theoretical_loss": 3.3851714773703883, "tokens_seen": 2351038464 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.005248021800071001, "objective/train/docs_used": 1325921, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 1.8273226022720337, "objective/train/original_loss": 1.8273224830627441, "objective/train/theoretical_loss": 3.3851333811867157, "objective/train/tokens_used": 710225376, "objective/train/value_avg": -0.0103912353515625, "objective/train/value_loss": 0.0001538982178317383, "objective/train/value_max": -0.00031757354736328125, "objective/train/value_min": -0.11163330078125, "objective/train/value_reward_corr": 0.5300746265645743, "objective/train/value_std": 0.008148193359375, "objective/train/weight_avg": 1.0005255937576294, "objective/train/weighted_lm_loss": 1.8286486864089966, "objective/train/weights_max": 1.0092153549194336, "objective/train/weights_min": 0.9879461526870728, "theoretical_loss": 3.3851333811867157, "tokens_seen": 2351366144 }, { "epoch": 0.42, "learning_rate": 0.0005848690591658584, "loss": 1.2857, "theoretical_loss": 3.3851105267381976, "tokens_seen": 2351562752 }, { "epoch": 0.42, "learning_rate": 0.0005845457484642742, "loss": 1.3024, "theoretical_loss": 3.385049593497608, "tokens_seen": 2352087040 }, { "epoch": 0.42, "learning_rate": 0.0005842224377626899, "loss": 1.2903, "theoretical_loss": 3.384988677639782, "tokens_seen": 2352611328 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.005655945278704166, "objective/train/docs_used": 1327039, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9431047439575195, "objective/train/original_loss": 2.9431047439575195, "objective/train/theoretical_loss": 3.384943002148538, "objective/train/tokens_used": 711863776, "objective/train/value_avg": -0.014007568359375, "objective/train/value_loss": 0.0030923644080758095, "objective/train/value_max": -0.0006046295166015625, "objective/train/value_min": -0.26611328125, "objective/train/value_reward_corr": 0.1628018654264721, "objective/train/value_std": 0.01293182373046875, "objective/train/weight_avg": 1.0005807876586914, "objective/train/weighted_lm_loss": 2.9450912475585938, "objective/train/weights_max": 1.0266181230545044, "objective/train/weights_min": 0.9129643440246582, "theoretical_loss": 3.384943002148538, "tokens_seen": 2353004544 }, { "epoch": 0.42, "learning_rate": 0.0005838991270611057, "loss": 1.283, "theoretical_loss": 3.3849277791558885, "tokens_seen": 2353135616 }, { "epoch": 0.42, "learning_rate": 0.0005835758163595215, "loss": 1.3191, "theoretical_loss": 3.384866898037103, "tokens_seen": 2353659904 }, { "epoch": 0.42, "learning_rate": 0.0005832525056579373, "loss": 1.2897, "theoretical_loss": 3.384806034274608, "tokens_seen": 2354184192 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.004429525230079889, "objective/train/docs_used": 1327759, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5940024852752686, "objective/train/original_loss": 2.5940017700195312, "objective/train/theoretical_loss": 3.384752792713079, "objective/train/tokens_used": 713502176, "objective/train/value_avg": -0.015106201171875, "objective/train/value_loss": 0.0029087685979902744, "objective/train/value_max": -0.00015354156494140625, "objective/train/value_min": -0.939453125, "objective/train/value_reward_corr": 0.46697646389064523, "objective/train/value_std": 0.0225067138671875, "objective/train/weight_avg": 1.0004572868347168, "objective/train/weighted_lm_loss": 2.5955283641815186, "objective/train/weights_max": 1.0212417840957642, "objective/train/weights_min": 0.910754382610321, "theoretical_loss": 3.384752792713079, "tokens_seen": 2354642944 }, { "epoch": 0.42, "learning_rate": 0.0005829291949563531, "loss": 1.3188, "theoretical_loss": 3.3847451878595907, "tokens_seen": 2354708480 }, { "epoch": 0.42, "learning_rate": 0.0005826058842547688, "loss": 1.288, "theoretical_loss": 3.3846843587832467, "tokens_seen": 2355232768 }, { "epoch": 0.42, "learning_rate": 0.0005822825735531846, "loss": 1.3167, "theoretical_loss": 3.3846235470367763, "tokens_seen": 2355757056 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.00044515778427012265, "objective/train/docs_used": 1329159, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.552196502685547, "objective/train/original_loss": 2.5521960258483887, "objective/train/theoretical_loss": 3.384562752611388, "objective/train/tokens_used": 715140576, "objective/train/value_avg": -0.0161590576171875, "objective/train/value_loss": 0.0037145500537008047, "objective/train/value_max": -0.0005955696105957031, "objective/train/value_min": -0.2293701171875, "objective/train/value_reward_corr": 0.37039425930745706, "objective/train/value_std": 0.0185089111328125, "objective/train/weight_avg": 1.0000628232955933, "objective/train/weighted_lm_loss": 2.551887273788452, "objective/train/weights_max": 1.0174705982208252, "objective/train/weights_min": 0.9171373248100281, "theoretical_loss": 3.384562752611388, "tokens_seen": 2356281344 }, { "epoch": 0.42, "learning_rate": 0.0005819592628516004, "loss": 1.3154, "theoretical_loss": 3.384562752611388, "tokens_seen": 2356281344 }, { "epoch": 0.42, "learning_rate": 0.0005816359521500162, "loss": 1.3226, "theoretical_loss": 3.3845019754982957, "tokens_seen": 2356805632 }, { "epoch": 0.42, "learning_rate": 0.000581312641448432, "loss": 1.2921, "theoretical_loss": 3.384441215688719, "tokens_seen": 2357329920 }, { "epoch": 0.42, "learning_rate": 0.0005809893307468476, "loss": 1.2734, "theoretical_loss": 3.384380473173886, "tokens_seen": 2357854208 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.002220011316239834, "objective/train/docs_used": 1329871, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5115983486175537, "objective/train/original_loss": 2.5115983486175537, "objective/train/theoretical_loss": 3.3843728815751315, "objective/train/tokens_used": 716778976, "objective/train/value_avg": -0.045257568359375, "objective/train/value_loss": 0.009910106658935547, "objective/train/value_max": -0.0007948875427246094, "objective/train/value_min": -0.9931640625, "objective/train/value_reward_corr": 0.8141154575701125, "objective/train/value_std": 0.13232421875, "objective/train/weight_avg": 0.9998269081115723, "objective/train/weighted_lm_loss": 2.5128679275512695, "objective/train/weights_max": 1.0876500606536865, "objective/train/weights_min": 0.9115759134292603, "theoretical_loss": 3.3843728815751315, "tokens_seen": 2357919744 }, { "epoch": 0.43, "learning_rate": 0.0005806660200452635, "loss": 1.2892, "theoretical_loss": 3.3843197479450287, "tokens_seen": 2358378496 }, { "epoch": 0.43, "learning_rate": 0.0005803427093436792, "loss": 1.2917, "theoretical_loss": 3.3842590399933874, "tokens_seen": 2358902784 }, { "epoch": 0.43, "learning_rate": 0.0005800193986420951, "loss": 1.3165, "theoretical_loss": 3.384198349310208, "tokens_seen": 2359427072 }, { "epoch": 0.43, "objective/train/advantage_avg": -0.0012856778921559453, "objective/train/docs_used": 1331053, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.0878055095672607, "objective/train/original_loss": 2.0878052711486816, "objective/train/theoretical_loss": 3.384183179336585, "objective/train/tokens_used": 718417376, "objective/train/value_avg": -0.0178985595703125, "objective/train/value_loss": 0.005018532741814852, "objective/train/value_max": -0.0004355907440185547, "objective/train/value_min": -0.18701171875, "objective/train/value_reward_corr": 0.47540025791740065, "objective/train/value_std": 0.0159149169921875, "objective/train/weight_avg": 0.999896228313446, "objective/train/weighted_lm_loss": 2.0885655879974365, "objective/train/weights_max": 1.0177122354507446, "objective/train/weights_min": 0.9332689642906189, "theoretical_loss": 3.384183179336585, "tokens_seen": 2359558144 }, { "epoch": 0.43, "learning_rate": 0.0005796960879405109, "loss": 1.2861, "theoretical_loss": 3.3841376758867425, "tokens_seen": 2359951360 }, { "epoch": 0.43, "learning_rate": 0.0005793727772389266, "loss": 1.2634, "theoretical_loss": 3.3840770197142502, "tokens_seen": 2360475648 }, { "epoch": 0.43, "learning_rate": 0.0005790494665373424, "loss": 1.2928, "theoretical_loss": 3.384016380783996, "tokens_seen": 2360999936 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.003984550014138222, "objective/train/docs_used": 1331743, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4252450466156006, "objective/train/original_loss": 2.4252448081970215, "objective/train/theoretical_loss": 3.383993645628633, "objective/train/tokens_used": 720055776, "objective/train/value_avg": -0.020263671875, "objective/train/value_loss": 0.004525930620729923, "objective/train/value_max": -0.0003275871276855469, "objective/train/value_min": -0.998046875, "objective/train/value_reward_corr": 0.6908518383802842, "objective/train/value_std": 0.056915283203125, "objective/train/weight_avg": 1.0004205703735352, "objective/train/weighted_lm_loss": 2.427233934402466, "objective/train/weights_max": 1.0496279001235962, "objective/train/weights_min": 0.908545732498169, "theoretical_loss": 3.383993645628633, "tokens_seen": 2361196544 }, { "epoch": 0.43, "learning_rate": 0.0005787261558357581, "loss": 1.3244, "theoretical_loss": 3.3839557590872507, "tokens_seen": 2361524224 }, { "epoch": 0.43, "learning_rate": 0.000578402845134174, "loss": 1.3389, "theoretical_loss": 3.3838951546152933, "tokens_seen": 2362048512 }, { "epoch": 0.43, "learning_rate": 0.0005780795344325898, "loss": 1.2669, "theoretical_loss": 3.3838345673594072, "tokens_seen": 2362572800 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.005917126778513193, "objective/train/docs_used": 1333010, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.732544422149658, "objective/train/original_loss": 2.7325446605682373, "objective/train/theoretical_loss": 3.383804280184769, "objective/train/tokens_used": 721694176, "objective/train/value_avg": -0.015228271484375, "objective/train/value_loss": 0.0028342914301902056, "objective/train/value_max": -0.0008559226989746094, "objective/train/value_min": -0.9345703125, "objective/train/value_reward_corr": 0.3718399685474359, "objective/train/value_std": 0.0183563232421875, "objective/train/weight_avg": 1.000605583190918, "objective/train/weighted_lm_loss": 2.7339110374450684, "objective/train/weights_max": 1.0239273309707642, "objective/train/weights_min": 0.9069600105285645, "theoretical_loss": 3.383804280184769, "tokens_seen": 2362834944 }, { "epoch": 0.43, "learning_rate": 0.0005777562237310055, "loss": 1.2996, "theoretical_loss": 3.3837739973108834, "tokens_seen": 2363097088 }, { "epoch": 0.43, "learning_rate": 0.0005774329130294213, "loss": 1.2916, "theoretical_loss": 3.3837134444610184, "tokens_seen": 2363621376 }, { "epoch": 0.43, "learning_rate": 0.000577109602327837, "loss": 1.2996, "theoretical_loss": 3.383652908801116, "tokens_seen": 2364145664 }, { "epoch": 0.43, "objective/train/advantage_avg": -7.33563065296039e-05, "objective/train/docs_used": 1333580, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.511047601699829, "objective/train/original_loss": 2.511047601699829, "objective/train/theoretical_loss": 3.3836150827390927, "objective/train/tokens_used": 723332576, "objective/train/value_avg": -0.0174407958984375, "objective/train/value_loss": 0.005281064193695784, "objective/train/value_max": -0.0007352828979492188, "objective/train/value_min": -0.94775390625, "objective/train/value_reward_corr": 0.5896662620108444, "objective/train/value_std": 0.03143310546875, "objective/train/weight_avg": 1.0000187158584595, "objective/train/weighted_lm_loss": 2.511082410812378, "objective/train/weights_max": 1.0397011041641235, "objective/train/weights_min": 0.9299317002296448, "theoretical_loss": 3.3836150827390927, "tokens_seen": 2364473344 }, { "epoch": 0.43, "learning_rate": 0.0005767862916262528, "loss": 1.3393, "theoretical_loss": 3.3835923903224847, "tokens_seen": 2364669952 }, { "epoch": 0.43, "learning_rate": 0.0005764629809246687, "loss": 1.2789, "theoretical_loss": 3.383531889016441, "tokens_seen": 2365194240 }, { "epoch": 0.43, "learning_rate": 0.0005761396702230844, "loss": 1.304, "theoretical_loss": 3.383471404874307, "tokens_seen": 2365718528 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.008228359743952751, "objective/train/docs_used": 1334672, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6175613403320312, "objective/train/original_loss": 2.6175615787506104, "objective/train/theoretical_loss": 3.383426053026305, "objective/train/tokens_used": 724970976, "objective/train/value_avg": -0.021575927734375, "objective/train/value_loss": 0.0012587602250277996, "objective/train/value_max": -0.0005974769592285156, "objective/train/value_min": -0.31982421875, "objective/train/value_reward_corr": 0.600772509828427, "objective/train/value_std": 0.0321044921875, "objective/train/weight_avg": 1.0008291006088257, "objective/train/weighted_lm_loss": 2.6200215816497803, "objective/train/weights_max": 1.0265181064605713, "objective/train/weights_min": 0.9575564861297607, "theoretical_loss": 3.383426053026305, "tokens_seen": 2366111744 }, { "epoch": 0.43, "learning_rate": 0.0005758163595215002, "loss": 1.2954, "theoretical_loss": 3.3834109378874113, "tokens_seen": 2366242816 }, { "epoch": 0.43, "learning_rate": 0.0005754930488199159, "loss": 1.3236, "theoretical_loss": 3.383350488047088, "tokens_seen": 2366767104 }, { "epoch": 0.43, "learning_rate": 0.0005751697381183317, "loss": 1.3285, "theoretical_loss": 3.3832900553446787, "tokens_seen": 2367291392 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.003980557434260845, "objective/train/docs_used": 1335172, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6056301593780518, "objective/train/original_loss": 2.6056301593780518, "objective/train/theoretical_loss": 3.3832371907817134, "objective/train/tokens_used": 726609376, "objective/train/value_avg": -0.013641357421875, "objective/train/value_loss": 0.00181409006472677, "objective/train/value_max": -0.0004897117614746094, "objective/train/value_min": -0.568359375, "objective/train/value_reward_corr": 0.5040238230759935, "objective/train/value_std": 0.0176544189453125, "objective/train/weight_avg": 1.0004069805145264, "objective/train/weighted_lm_loss": 2.6074471473693848, "objective/train/weights_max": 1.0459353923797607, "objective/train/weights_min": 0.9267333149909973, "theoretical_loss": 3.3832371907817134, "tokens_seen": 2367750144 }, { "epoch": 0.43, "learning_rate": 0.0005748464274167476, "loss": 1.307, "theoretical_loss": 3.383229639771531, "tokens_seen": 2367815680 }, { "epoch": 0.43, "learning_rate": 0.0005745231167151633, "loss": 1.3562, "theoretical_loss": 3.383169241318998, "tokens_seen": 2368339968 }, { "epoch": 0.43, "learning_rate": 0.000574199806013579, "loss": 1.3101, "theoretical_loss": 3.3831088599784396, "tokens_seen": 2368864256 }, { "epoch": 0.43, "objective/train/advantage_avg": -0.0023168781772255898, "objective/train/docs_used": 1336475, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.475102663040161, "objective/train/original_loss": 2.475102424621582, "objective/train/theoretical_loss": 3.3830484957412224, "objective/train/tokens_used": 728247776, "objective/train/value_avg": -0.0152740478515625, "objective/train/value_loss": 0.004204327240586281, "objective/train/value_max": -0.0004355907440185547, "objective/train/value_min": -0.70751953125, "objective/train/value_reward_corr": 0.5347201931758323, "objective/train/value_std": 0.020111083984375, "objective/train/weight_avg": 0.9997890591621399, "objective/train/weighted_lm_loss": 2.4758594036102295, "objective/train/weights_max": 1.0153350830078125, "objective/train/weights_min": 0.9165407419204712, "theoretical_loss": 3.3830484957412224, "tokens_seen": 2369388544 }, { "epoch": 0.43, "learning_rate": 0.0005738764953119948, "loss": 1.2779, "theoretical_loss": 3.3830484957412224, "tokens_seen": 2369388544 }, { "epoch": 0.43, "learning_rate": 0.0005735531846104106, "loss": 1.293, "theoretical_loss": 3.3829881485987183, "tokens_seen": 2369912832 }, { "epoch": 0.43, "learning_rate": 0.0005732298739088265, "loss": 1.3359, "theoretical_loss": 3.382927818542307, "tokens_seen": 2370437120 }, { "epoch": 0.43, "learning_rate": 0.0005729065632072422, "loss": 1.3064, "theoretical_loss": 3.382867505563372, "tokens_seen": 2370961408 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0005026076687499881, "objective/train/docs_used": 1336773, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.454712390899658, "objective/train/original_loss": 2.4547128677368164, "objective/train/theoretical_loss": 3.3828599676413367, "objective/train/tokens_used": 729886176, "objective/train/value_avg": -0.0222625732421875, "objective/train/value_loss": 0.004424029495567083, "objective/train/value_max": -0.00015234947204589844, "objective/train/value_min": -0.97802734375, "objective/train/value_reward_corr": 0.6995306307639553, "objective/train/value_std": 0.054931640625, "objective/train/weight_avg": 1.0000720024108887, "objective/train/weighted_lm_loss": 2.455209493637085, "objective/train/weights_max": 1.0849403142929077, "objective/train/weights_min": 0.9076063632965088, "theoretical_loss": 3.3828599676413367, "tokens_seen": 2371026944 }, { "epoch": 0.43, "learning_rate": 0.0005725832525056579, "loss": 1.3641, "theoretical_loss": 3.3828072096533055, "tokens_seen": 2371485696 }, { "epoch": 0.43, "learning_rate": 0.0005722599418040737, "loss": 1.2794, "theoretical_loss": 3.382746930803505, "tokens_seen": 2372009984 }, { "epoch": 0.43, "learning_rate": 0.0005719366311024895, "loss": 1.3028, "theoretical_loss": 3.3826866690053734, "tokens_seen": 2372534272 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.004203373100608587, "objective/train/docs_used": 1338329, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2728381156921387, "objective/train/original_loss": 2.2728381156921387, "objective/train/theoretical_loss": 3.3826716062191573, "objective/train/tokens_used": 731524576, "objective/train/value_avg": -0.030731201171875, "objective/train/value_loss": 0.005534665193408728, "objective/train/value_max": -0.00047278404235839844, "objective/train/value_min": -0.9765625, "objective/train/value_reward_corr": 0.7459167146968051, "objective/train/value_std": 0.07647705078125, "objective/train/weight_avg": 1.0004476308822632, "objective/train/weighted_lm_loss": 2.2732739448547363, "objective/train/weights_max": 1.0774048566818237, "objective/train/weights_min": 0.9116792678833008, "theoretical_loss": 3.3826716062191573, "tokens_seen": 2372665344 }, { "epoch": 0.43, "learning_rate": 0.0005716133204009054, "loss": 1.3198, "theoretical_loss": 3.3826264242503212, "tokens_seen": 2373058560 }, { "epoch": 0.43, "learning_rate": 0.0005712900096993211, "loss": 1.3276, "theoretical_loss": 3.3825661965297646, "tokens_seen": 2373582848 }, { "epoch": 0.43, "learning_rate": 0.0005709666989977368, "loss": 1.2884, "theoretical_loss": 3.382505985835126, "tokens_seen": 2374107136 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.004087573383003473, "objective/train/docs_used": 1338943, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5486247539520264, "objective/train/original_loss": 2.548624277114868, "objective/train/theoretical_loss": 3.382483411212381, "objective/train/tokens_used": 733162976, "objective/train/value_avg": -0.01537322998046875, "objective/train/value_loss": 0.0019414565758779645, "objective/train/value_max": -0.0006537437438964844, "objective/train/value_min": -0.9755859375, "objective/train/value_reward_corr": 0.6127641409665661, "objective/train/value_std": 0.037628173828125, "objective/train/weight_avg": 1.0004183053970337, "objective/train/weighted_lm_loss": 2.5507051944732666, "objective/train/weights_max": 1.0763641595840454, "objective/train/weights_min": 0.9069527983665466, "theoretical_loss": 3.382483411212381, "tokens_seen": 2374303744 }, { "epoch": 0.44, "learning_rate": 0.0005706433882961526, "loss": 1.2905, "theoretical_loss": 3.3824457921578333, "tokens_seen": 2374631424 }, { "epoch": 0.44, "learning_rate": 0.0005703200775945684, "loss": 1.311, "theoretical_loss": 3.382385615489322, "tokens_seen": 2375155712 }, { "epoch": 0.44, "learning_rate": 0.0005699967668929841, "loss": 1.2632, "theoretical_loss": 3.3823254558210323, "tokens_seen": 2375680000 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.00670763710513711, "objective/train/docs_used": 1339545, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.707014322280884, "objective/train/original_loss": 2.7070138454437256, "objective/train/theoretical_loss": 3.382295382359298, "objective/train/tokens_used": 734801376, "objective/train/value_avg": -0.0211181640625, "objective/train/value_loss": 0.001736419158987701, "objective/train/value_max": -0.0006165504455566406, "objective/train/value_min": -0.5517578125, "objective/train/value_reward_corr": 0.5059024584641727, "objective/train/value_std": 0.0288238525390625, "objective/train/weight_avg": 1.00067937374115, "objective/train/weighted_lm_loss": 2.7091643810272217, "objective/train/weights_max": 1.0352977514266968, "objective/train/weights_min": 0.9439584016799927, "theoretical_loss": 3.382295382359298, "tokens_seen": 2375942144 }, { "epoch": 0.44, "learning_rate": 0.0005696734561914, "loss": 1.2976, "theoretical_loss": 3.3822653131444125, "tokens_seen": 2376204288 }, { "epoch": 0.44, "learning_rate": 0.0005693501454898157, "loss": 1.3112, "theoretical_loss": 3.3822051874509147, "tokens_seen": 2376728576 }, { "epoch": 0.44, "learning_rate": 0.0005690268347882315, "loss": 1.3006, "theoretical_loss": 3.382145078731999, "tokens_seen": 2377252864 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0038621004205197096, "objective/train/docs_used": 1340666, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.170992136001587, "objective/train/original_loss": 2.170992136001587, "objective/train/theoretical_loss": 3.38210751939879, "objective/train/tokens_used": 736439776, "objective/train/value_avg": -0.01390838623046875, "objective/train/value_loss": 0.0009787552990019321, "objective/train/value_max": -0.00022876262664794922, "objective/train/value_min": -0.32861328125, "objective/train/value_reward_corr": 0.5500254420205962, "objective/train/value_std": 0.0199127197265625, "objective/train/weight_avg": 1.0003910064697266, "objective/train/weighted_lm_loss": 2.1720120906829834, "objective/train/weights_max": 1.0199843645095825, "objective/train/weights_min": 0.947335422039032, "theoretical_loss": 3.38210751939879, "tokens_seen": 2377580544 }, { "epoch": 0.44, "learning_rate": 0.0005687035240866473, "loss": 1.3072, "theoretical_loss": 3.382084986979132, "tokens_seen": 2377777152 }, { "epoch": 0.44, "learning_rate": 0.000568380213385063, "loss": 1.3328, "theoretical_loss": 3.382024912183784, "tokens_seen": 2378301440 }, { "epoch": 0.44, "learning_rate": 0.0005680569026834789, "loss": 1.3138, "theoretical_loss": 3.381964854337434, "tokens_seen": 2378825728 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.004962458275258541, "objective/train/docs_used": 1341274, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4905083179473877, "objective/train/original_loss": 2.4905080795288086, "objective/train/theoretical_loss": 3.381919822070328, "objective/train/tokens_used": 738078176, "objective/train/value_avg": -0.0170440673828125, "objective/train/value_loss": 0.002838315675035119, "objective/train/value_max": -0.0004222393035888672, "objective/train/value_min": -0.47216796875, "objective/train/value_reward_corr": 0.3884089367279129, "objective/train/value_std": 0.019683837890625, "objective/train/weight_avg": 1.0005102157592773, "objective/train/weighted_lm_loss": 2.492173910140991, "objective/train/weights_max": 1.041896104812622, "objective/train/weights_min": 0.9354239702224731, "theoretical_loss": 3.381919822070328, "tokens_seen": 2379218944 }, { "epoch": 0.44, "learning_rate": 0.0005677335919818946, "loss": 1.3191, "theoretical_loss": 3.381904813431566, "tokens_seen": 2379350016 }, { "epoch": 0.44, "learning_rate": 0.0005674102812803103, "loss": 1.3039, "theoretical_loss": 3.3818447894576704, "tokens_seen": 2379874304 }, { "epoch": 0.44, "learning_rate": 0.0005670869705787262, "loss": 1.299, "theoretical_loss": 3.3817847824072436, "tokens_seen": 2380398592 }, { "epoch": 0.44, "objective/train/advantage_avg": -0.006115451920777559, "objective/train/docs_used": 1342636, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3089325428009033, "objective/train/original_loss": 2.308932304382324, "objective/train/theoretical_loss": 3.381732290113973, "objective/train/tokens_used": 739716576, "objective/train/value_avg": -0.0160369873046875, "objective/train/value_loss": 0.0030945546459406614, "objective/train/value_max": -0.000720977783203125, "objective/train/value_min": -0.487548828125, "objective/train/value_reward_corr": 0.6158699901329355, "objective/train/value_std": 0.0189208984375, "objective/train/weight_avg": 0.9994037747383118, "objective/train/weighted_lm_loss": 2.3088929653167725, "objective/train/weights_max": 1.0157358646392822, "objective/train/weights_min": 0.9234463572502136, "theoretical_loss": 3.381732290113973, "tokens_seen": 2380857344 }, { "epoch": 0.44, "learning_rate": 0.0005667636598771419, "loss": 1.3244, "theoretical_loss": 3.3817247922717883, "tokens_seen": 2380922880 }, { "epoch": 0.44, "learning_rate": 0.0005664403491755578, "loss": 1.3275, "theoretical_loss": 3.3816648190428134, "tokens_seen": 2381447168 }, { "epoch": 0.44, "learning_rate": 0.0005661170384739736, "loss": 1.3164, "theoretical_loss": 3.381604862711834, "tokens_seen": 2381971456 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0040020146407186985, "objective/train/docs_used": 1343061, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8278238773345947, "objective/train/original_loss": 2.827824115753174, "objective/train/theoretical_loss": 3.3815449232703703, "objective/train/tokens_used": 741354976, "objective/train/value_avg": -0.01520538330078125, "objective/train/value_loss": 0.0018038938287645578, "objective/train/value_max": -0.000820159912109375, "objective/train/value_min": -0.54052734375, "objective/train/value_reward_corr": 0.4755144958251262, "objective/train/value_std": 0.024169921875, "objective/train/weight_avg": 1.0004091262817383, "objective/train/weighted_lm_loss": 2.8291139602661133, "objective/train/weights_max": 1.0449062585830688, "objective/train/weights_min": 0.9480892419815063, "theoretical_loss": 3.3815449232703703, "tokens_seen": 2382495744 }, { "epoch": 0.44, "learning_rate": 0.0005657937277723892, "loss": 1.3254, "theoretical_loss": 3.3815449232703703, "tokens_seen": 2382495744 }, { "epoch": 0.44, "learning_rate": 0.0005654704170708051, "loss": 1.3404, "theoretical_loss": 3.38148500070995, "tokens_seen": 2383020032 }, { "epoch": 0.44, "learning_rate": 0.0005651471063692208, "loss": 1.3305, "theoretical_loss": 3.3814250950221068, "tokens_seen": 2383544320 }, { "epoch": 0.44, "learning_rate": 0.0005648237956676366, "loss": 1.296, "theoretical_loss": 3.38136520619838, "tokens_seen": 2384068608 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.005414496175944805, "objective/train/docs_used": 1344107, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.69108247756958, "objective/train/original_loss": 2.691082000732422, "objective/train/theoretical_loss": 3.381357721280751, "objective/train/tokens_used": 742993376, "objective/train/value_avg": -0.016082763671875, "objective/train/value_loss": 0.000634512398391962, "objective/train/value_max": -0.0004916191101074219, "objective/train/value_min": -0.385009765625, "objective/train/value_reward_corr": 0.5651154887144534, "objective/train/value_std": 0.0194854736328125, "objective/train/weight_avg": 1.000544548034668, "objective/train/weighted_lm_loss": 2.693204402923584, "objective/train/weights_max": 1.0209081172943115, "objective/train/weights_min": 0.9728760123252869, "theoretical_loss": 3.381357721280751, "tokens_seen": 2384134144 }, { "epoch": 0.44, "learning_rate": 0.0005645004849660525, "loss": 1.3549, "theoretical_loss": 3.3813053342303143, "tokens_seen": 2384592896 }, { "epoch": 0.44, "learning_rate": 0.0005641771742644681, "loss": 1.3592, "theoretical_loss": 3.381245479109462, "tokens_seen": 2385117184 }, { "epoch": 0.44, "learning_rate": 0.000563853863562884, "loss": 1.2973, "theoretical_loss": 3.3811856408273804, "tokens_seen": 2385641472 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.009358475916087627, "objective/train/docs_used": 1344624, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5527610778808594, "objective/train/original_loss": 2.5527610778808594, "objective/train/theoretical_loss": 3.3811706838869293, "objective/train/tokens_used": 744631776, "objective/train/value_avg": -0.026275634765625, "objective/train/value_loss": 0.004332544282078743, "objective/train/value_max": -0.0004425048828125, "objective/train/value_min": -0.77685546875, "objective/train/value_reward_corr": 0.438810883573745, "objective/train/value_std": 0.0362548828125, "objective/train/weight_avg": 1.0009572505950928, "objective/train/weighted_lm_loss": 2.5553691387176514, "objective/train/weights_max": 1.0411338806152344, "objective/train/weights_min": 0.913602888584137, "theoretical_loss": 3.3811706838869293, "tokens_seen": 2385772544 }, { "epoch": 0.44, "learning_rate": 0.0005635305528612997, "loss": 1.3472, "theoretical_loss": 3.3811258193756335, "tokens_seen": 2386165760 }, { "epoch": 0.44, "learning_rate": 0.0005632072421597155, "loss": 1.3596, "theoretical_loss": 3.3810660147457914, "tokens_seen": 2386690048 }, { "epoch": 0.44, "learning_rate": 0.0005628839314581314, "loss": 1.3607, "theoretical_loss": 3.381006226929429, "tokens_seen": 2387214336 }, { "epoch": 0.44, "objective/train/advantage_avg": -0.0036685490049421787, "objective/train/docs_used": 1344624, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6685571670532227, "objective/train/original_loss": 2.6685569286346436, "objective/train/theoretical_loss": 3.380983810831301, "objective/train/tokens_used": 746270176, "objective/train/value_avg": -0.0205078125, "objective/train/value_loss": 0.005852099973708391, "objective/train/value_max": -0.0004029273986816406, "objective/train/value_min": -0.65625, "objective/train/value_reward_corr": 0.6690898205339997, "objective/train/value_std": 0.0347900390625, "objective/train/weight_avg": 0.999661922454834, "objective/train/weighted_lm_loss": 2.6668288707733154, "objective/train/weights_max": 1.037771463394165, "objective/train/weights_min": 0.9240645170211792, "theoretical_loss": 3.380983810831301, "tokens_seen": 2387410944 }, { "epoch": 0.44, "learning_rate": 0.000562560620756547, "loss": 1.3947, "theoretical_loss": 3.3809464559181297, "tokens_seen": 2387738624 }, { "epoch": 0.44, "learning_rate": 0.0005622373100549628, "loss": 1.4187, "theoretical_loss": 3.3808867017034805, "tokens_seen": 2388262912 }, { "epoch": 0.44, "learning_rate": 0.0005619139993533786, "loss": 1.3858, "theoretical_loss": 3.380826964277076, "tokens_seen": 2388787200 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0026297876611351967, "objective/train/docs_used": 1345276, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.614800453186035, "objective/train/original_loss": 2.614800453186035, "objective/train/theoretical_loss": 3.3807971018568397, "objective/train/tokens_used": 747908576, "objective/train/value_avg": -0.024200439453125, "objective/train/value_loss": 0.007862025871872902, "objective/train/value_max": -0.0003905296325683594, "objective/train/value_min": -0.994140625, "objective/train/value_reward_corr": 0.5134189240888241, "objective/train/value_std": 0.06561279296875, "objective/train/weight_avg": 1.0003015995025635, "objective/train/weighted_lm_loss": 2.61604642868042, "objective/train/weights_max": 1.0710153579711914, "objective/train/weights_min": 0.9090467691421509, "theoretical_loss": 3.3807971018568397, "tokens_seen": 2389049344 }, { "epoch": 0.44, "learning_rate": 0.0005615906886517944, "loss": 1.456, "theoretical_loss": 3.380767243630516, "tokens_seen": 2389311488 }, { "epoch": 0.44, "learning_rate": 0.0005612673779502103, "loss": 1.4017, "theoretical_loss": 3.3807075397554067, "tokens_seen": 2389835776 }, { "epoch": 0.44, "learning_rate": 0.0005609440672486259, "loss": 1.3862, "theoretical_loss": 3.3806478526433605, "tokens_seen": 2390360064 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.010053473524749279, "objective/train/docs_used": 1345792, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8066556453704834, "objective/train/original_loss": 2.8066558837890625, "objective/train/theoretical_loss": 3.3806105567071, "objective/train/tokens_used": 749546976, "objective/train/value_avg": -0.0158233642578125, "objective/train/value_loss": 0.0008417118806391954, "objective/train/value_max": -0.0005526542663574219, "objective/train/value_min": -0.5751953125, "objective/train/value_reward_corr": 0.31746357320317603, "objective/train/value_std": 0.0181884765625, "objective/train/weight_avg": 1.0010095834732056, "objective/train/weighted_lm_loss": 2.810087203979492, "objective/train/weights_max": 1.0388476848602295, "objective/train/weights_min": 0.9652325510978699, "theoretical_loss": 3.3806105567071, "tokens_seen": 2390687744 }, { "epoch": 0.45, "learning_rate": 0.0005606207565470417, "loss": 1.3922, "theoretical_loss": 3.3805881822859956, "tokens_seen": 2390884352 }, { "epoch": 0.45, "learning_rate": 0.0005602974458454575, "loss": 1.3903, "theoretical_loss": 3.3805285286749367, "tokens_seen": 2391408640 }, { "epoch": 0.45, "learning_rate": 0.0005599741351438733, "loss": 1.421, "theoretical_loss": 3.3804688918018133, "tokens_seen": 2391932928 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.00919165275990963, "objective/train/docs_used": 1347158, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.567422866821289, "objective/train/original_loss": 2.567423105239868, "objective/train/theoretical_loss": 3.38042417512621, "objective/train/tokens_used": 751185376, "objective/train/value_avg": -0.022430419921875, "objective/train/value_loss": 0.0016049608821049333, "objective/train/value_max": -0.00046563148498535156, "objective/train/value_min": -0.97314453125, "objective/train/value_reward_corr": 0.7997018643711024, "objective/train/value_std": 0.057952880859375, "objective/train/weight_avg": 1.000927209854126, "objective/train/weighted_lm_loss": 2.5702905654907227, "objective/train/weights_max": 1.0433564186096191, "objective/train/weights_min": 0.9257402420043945, "theoretical_loss": 3.38042417512621, "tokens_seen": 2392326144 }, { "epoch": 0.45, "learning_rate": 0.0005596508244422891, "loss": 1.3547, "theoretical_loss": 3.3804092716582623, "tokens_seen": 2392457216 }, { "epoch": 0.45, "learning_rate": 0.0005593275137407048, "loss": 1.3945, "theoretical_loss": 3.3803496682359264, "tokens_seen": 2392981504 }, { "epoch": 0.45, "learning_rate": 0.0005590042030391206, "loss": 1.3638, "theoretical_loss": 3.380290081526453, "tokens_seen": 2393505792 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.010656045749783516, "objective/train/docs_used": 1347750, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.2747859954833984, "objective/train/original_loss": 3.2747859954833984, "objective/train/theoretical_loss": 3.380237956858873, "objective/train/tokens_used": 752823776, "objective/train/value_avg": -0.01885986328125, "objective/train/value_loss": 0.0014832791639491916, "objective/train/value_max": -0.0007352828979492188, "objective/train/value_min": -0.72705078125, "objective/train/value_reward_corr": 0.42284244340012656, "objective/train/value_std": 0.0229339599609375, "objective/train/weight_avg": 1.0010730028152466, "objective/train/weighted_lm_loss": 3.278715133666992, "objective/train/weights_max": 1.0456935167312622, "objective/train/weights_min": 0.9090695977210999, "theoretical_loss": 3.380237956858873, "tokens_seen": 2393964544 }, { "epoch": 0.45, "learning_rate": 0.0005586808923375364, "loss": 1.3886, "theoretical_loss": 3.3802305115214972, "tokens_seen": 2394030080 }, { "epoch": 0.45, "learning_rate": 0.0005583575816359522, "loss": 1.4492, "theoretical_loss": 3.3801709582127186, "tokens_seen": 2394554368 }, { "epoch": 0.45, "learning_rate": 0.0005580342709343679, "loss": 1.3753, "theoretical_loss": 3.3801114215917845, "tokens_seen": 2395078656 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.004182158503681421, "objective/train/docs_used": 1348886, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.586012601852417, "objective/train/original_loss": 2.586012363433838, "objective/train/theoretical_loss": 3.3800519016503667, "objective/train/tokens_used": 754462176, "objective/train/value_avg": -0.025909423828125, "objective/train/value_loss": 0.005471243057399988, "objective/train/value_max": -0.0006666183471679688, "objective/train/value_min": -0.953125, "objective/train/value_reward_corr": 0.7253541800028256, "objective/train/value_std": 0.061614990234375, "objective/train/weight_avg": 1.0004451274871826, "objective/train/weighted_lm_loss": 2.5879225730895996, "objective/train/weights_max": 1.0622907876968384, "objective/train/weights_min": 0.9071458578109741, "theoretical_loss": 3.3800519016503667, "tokens_seen": 2395602944 }, { "epoch": 0.45, "learning_rate": 0.0005577109602327837, "loss": 1.3728, "theoretical_loss": 3.3800519016503667, "tokens_seen": 2395602944 }, { "epoch": 0.45, "learning_rate": 0.0005573876495311995, "loss": 1.3981, "theoretical_loss": 3.379992398380143, "tokens_seen": 2396127232 }, { "epoch": 0.45, "learning_rate": 0.0005570643388296153, "loss": 1.3911, "theoretical_loss": 3.3799329117727988, "tokens_seen": 2396651520 }, { "epoch": 0.45, "learning_rate": 0.0005567410281280311, "loss": 1.3753, "theoretical_loss": 3.379873441820023, "tokens_seen": 2397175808 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.011412554420530796, "objective/train/docs_used": 1349689, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.495745897293091, "objective/train/original_loss": 2.49574613571167, "objective/train/theoretical_loss": 3.379866009246537, "objective/train/tokens_used": 756100576, "objective/train/value_avg": -0.02001953125, "objective/train/value_loss": 0.002604921581223607, "objective/train/value_max": -0.00039196014404296875, "objective/train/value_min": -0.92724609375, "objective/train/value_reward_corr": 0.4696524846010115, "objective/train/value_std": 0.044281005859375, "objective/train/weight_avg": 1.0011540651321411, "objective/train/weighted_lm_loss": 2.4994781017303467, "objective/train/weights_max": 1.094521164894104, "objective/train/weights_min": 0.9102574586868286, "theoretical_loss": 3.379866009246537, "tokens_seen": 2397241344 }, { "epoch": 0.45, "learning_rate": 0.0005564177174264468, "loss": 1.3947, "theoretical_loss": 3.3798139885135132, "tokens_seen": 2397700096 }, { "epoch": 0.45, "learning_rate": 0.0005560944067248626, "loss": 1.3818, "theoretical_loss": 3.3797545518449708, "tokens_seen": 2398224384 }, { "epoch": 0.45, "learning_rate": 0.0005557710960232783, "loss": 1.3905, "theoretical_loss": 3.3796951318061033, "tokens_seen": 2398748672 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.008929046802222729, "objective/train/docs_used": 1350650, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.602806806564331, "objective/train/original_loss": 2.60280704498291, "objective/train/theoretical_loss": 3.3796802793938023, "objective/train/tokens_used": 757738976, "objective/train/value_avg": -0.0269775390625, "objective/train/value_loss": 0.003488719230517745, "objective/train/value_max": -0.000583648681640625, "objective/train/value_min": -0.62109375, "objective/train/value_reward_corr": 0.4927440090671795, "objective/train/value_std": 0.04278564453125, "objective/train/weight_avg": 1.0009100437164307, "objective/train/weighted_lm_loss": 2.6045584678649902, "objective/train/weights_max": 1.0473138093948364, "objective/train/weights_min": 0.9159911870956421, "theoretical_loss": 3.3796802793938023, "tokens_seen": 2398879744 }, { "epoch": 0.45, "learning_rate": 0.0005554477853216941, "loss": 1.3625, "theoretical_loss": 3.379635728388626, "tokens_seen": 2399272960 }, { "epoch": 0.45, "learning_rate": 0.00055512447462011, "loss": 1.344, "theoretical_loss": 3.379576341584258, "tokens_seen": 2399797248 }, { "epoch": 0.45, "learning_rate": 0.0005548011639185257, "loss": 1.3595, "theoretical_loss": 3.3795169713847253, "tokens_seen": 2400321536 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.006611206103116274, "objective/train/docs_used": 1352096, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.746408224105835, "objective/train/original_loss": 2.746407985687256, "objective/train/theoretical_loss": 3.379494711839147, "objective/train/tokens_used": 759377376, "objective/train/value_avg": -0.0178680419921875, "objective/train/value_loss": 0.0029236231930553913, "objective/train/value_max": -0.0004012584686279297, "objective/train/value_min": -0.9931640625, "objective/train/value_reward_corr": 0.469402115396473, "objective/train/value_std": 0.0301666259765625, "objective/train/weight_avg": 1.0006755590438843, "objective/train/weighted_lm_loss": 2.7483417987823486, "objective/train/weights_max": 1.0505719184875488, "objective/train/weights_min": 0.9095378518104553, "theoretical_loss": 3.379494711839147, "tokens_seen": 2400518144 }, { "epoch": 0.45, "learning_rate": 0.0005544778532169415, "loss": 1.3718, "theoretical_loss": 3.3794576177817595, "tokens_seen": 2400845824 }, { "epoch": 0.45, "learning_rate": 0.0005541545425153572, "loss": 1.3512, "theoretical_loss": 3.3793982807670995, "tokens_seen": 2401370112 }, { "epoch": 0.45, "learning_rate": 0.000553831231813773, "loss": 1.359, "theoretical_loss": 3.379338960332488, "tokens_seen": 2401894400 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.007525626104325056, "objective/train/docs_used": 1352871, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.884075880050659, "objective/train/original_loss": 2.8840761184692383, "objective/train/theoretical_loss": 3.3793093063301214, "objective/train/tokens_used": 761015776, "objective/train/value_avg": -0.0179290771484375, "objective/train/value_loss": 0.0031915258150547743, "objective/train/value_max": -0.0007791519165039062, "objective/train/value_min": -0.439697265625, "objective/train/value_reward_corr": 0.2215267882749468, "objective/train/value_std": 0.025177001953125, "objective/train/weight_avg": 1.0007681846618652, "objective/train/weighted_lm_loss": 2.8864777088165283, "objective/train/weights_max": 1.0414079427719116, "objective/train/weights_min": 0.9237536787986755, "theoretical_loss": 3.3793093063301214, "tokens_seen": 2402156544 }, { "epoch": 0.45, "learning_rate": 0.0005535079211121889, "loss": 1.3675, "theoretical_loss": 3.3792796564696745, "tokens_seen": 2402418688 }, { "epoch": 0.45, "learning_rate": 0.0005531846104106046, "loss": 1.3529, "theoretical_loss": 3.3792203691704144, "tokens_seen": 2402942976 }, { "epoch": 0.45, "learning_rate": 0.0005528612997090203, "loss": 1.3157, "theoretical_loss": 3.3791610984264704, "tokens_seen": 2403467264 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.001716520870104432, "objective/train/docs_used": 1354213, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1317384243011475, "objective/train/original_loss": 3.1317386627197266, "objective/train/theoretical_loss": 3.3791240626148427, "objective/train/tokens_used": 762654176, "objective/train/value_avg": -0.015899658203125, "objective/train/value_loss": 0.003187233116477728, "objective/train/value_max": -0.0006985664367675781, "objective/train/value_min": -0.373046875, "objective/train/value_reward_corr": 0.33941604282566906, "objective/train/value_std": 0.0167388916015625, "objective/train/weight_avg": 1.0001873970031738, "objective/train/weighted_lm_loss": 3.1327450275421143, "objective/train/weights_max": 1.0251775979995728, "objective/train/weights_min": 0.9208493232727051, "theoretical_loss": 3.3791240626148427, "tokens_seen": 2403794944 }, { "epoch": 0.45, "learning_rate": 0.0005525379890074361, "loss": 1.3759, "theoretical_loss": 3.379101844229608, "tokens_seen": 2403991552 }, { "epoch": 0.45, "learning_rate": 0.0005522146783058519, "loss": 1.3352, "theoretical_loss": 3.379042606571602, "tokens_seen": 2404515840 }, { "epoch": 0.45, "learning_rate": 0.0005518913676042678, "loss": 1.2927, "theoretical_loss": 3.37898338544423, "tokens_seen": 2405040128 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.00036147391074337065, "objective/train/docs_used": 1354954, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.894627094268799, "objective/train/original_loss": 2.894627809524536, "objective/train/theoretical_loss": 3.378938980441988, "objective/train/tokens_used": 764292576, "objective/train/value_avg": -0.0149078369140625, "objective/train/value_loss": 0.004608189687132835, "objective/train/value_max": -0.00046372413635253906, "objective/train/value_min": -0.59619140625, "objective/train/value_reward_corr": 0.4045905210413578, "objective/train/value_std": 0.0200958251953125, "objective/train/weight_avg": 1.0000587701797485, "objective/train/weighted_lm_loss": 2.895636796951294, "objective/train/weights_max": 1.0390043258666992, "objective/train/weights_min": 0.9154118895530701, "theoretical_loss": 3.378938980441988, "tokens_seen": 2405433344 }, { "epoch": 0.45, "learning_rate": 0.0005515680569026835, "loss": 1.3175, "theoretical_loss": 3.3789241808392774, "tokens_seen": 2405564416 }, { "epoch": 0.45, "learning_rate": 0.0005512447462010993, "loss": 1.3405, "theoretical_loss": 3.378864992748536, "tokens_seen": 2406088704 }, { "epoch": 0.45, "learning_rate": 0.000550921435499515, "loss": 1.332, "theoretical_loss": 3.378805821163801, "tokens_seen": 2406612992 }, { "epoch": 0.45, "objective/train/advantage_avg": -0.007409974001348019, "objective/train/docs_used": 1356171, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.865330696105957, "objective/train/original_loss": 2.865330219268799, "objective/train/theoretical_loss": 3.3787540595607974, "objective/train/tokens_used": 765930976, "objective/train/value_avg": -0.064697265625, "objective/train/value_loss": 0.006893636658787727, "objective/train/value_max": -0.0005636215209960938, "objective/train/value_min": -0.96142578125, "objective/train/value_reward_corr": 0.9388143787202891, "objective/train/value_std": 0.19677734375, "objective/train/weight_avg": 0.9992928504943848, "objective/train/weighted_lm_loss": 2.8640024662017822, "objective/train/weights_max": 1.0514659881591797, "objective/train/weights_min": 0.9135881066322327, "theoretical_loss": 3.3787540595607974, "tokens_seen": 2407071744 }, { "epoch": 0.46, "learning_rate": 0.0005505981247979308, "loss": 1.3361, "theoretical_loss": 3.378746666076876, "tokens_seen": 2407137280 }, { "epoch": 0.46, "learning_rate": 0.0005502748140963467, "loss": 1.3565, "theoretical_loss": 3.3786875274795682, "tokens_seen": 2407661568 }, { "epoch": 0.46, "learning_rate": 0.0005499515033947624, "loss": 1.304, "theoretical_loss": 3.3786284053636932, "tokens_seen": 2408185856 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.010301068425178528, "objective/train/docs_used": 1356770, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8660778999328613, "objective/train/original_loss": 2.8660776615142822, "objective/train/theoretical_loss": 3.3785692997210703, "objective/train/tokens_used": 767569376, "objective/train/value_avg": -0.032501220703125, "objective/train/value_loss": 0.008578515611588955, "objective/train/value_max": -0.0009965896606445312, "objective/train/value_min": -0.97802734375, "objective/train/value_reward_corr": 0.5178412421311666, "objective/train/value_std": 0.06939697265625, "objective/train/weight_avg": 1.0010725259780884, "objective/train/weighted_lm_loss": 2.868309736251831, "objective/train/weights_max": 1.0765299797058105, "objective/train/weights_min": 0.9115657806396484, "theoretical_loss": 3.3785692997210703, "tokens_seen": 2408710144 }, { "epoch": 0.46, "learning_rate": 0.0005496281926931782, "loss": 1.3431, "theoretical_loss": 3.3785692997210703, "tokens_seen": 2408710144 }, { "epoch": 0.46, "learning_rate": 0.0005493048819915939, "loss": 1.3282, "theoretical_loss": 3.3785102105435256, "tokens_seen": 2409234432 }, { "epoch": 0.46, "learning_rate": 0.0005489815712900097, "loss": 1.3479, "theoretical_loss": 3.378451137822891, "tokens_seen": 2409758720 }, { "epoch": 0.46, "learning_rate": 0.0005486582605884254, "loss": 1.353, "theoretical_loss": 3.3783920815510036, "tokens_seen": 2410283008 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0059792157262563705, "objective/train/docs_used": 1358132, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8860461711883545, "objective/train/original_loss": 2.8860464096069336, "objective/train/theoretical_loss": 3.378384700673164, "objective/train/tokens_used": 769207776, "objective/train/value_avg": -0.01432037353515625, "objective/train/value_loss": 0.0012749290326610208, "objective/train/value_max": -0.0005931854248046875, "objective/train/value_min": -0.88623046875, "objective/train/value_reward_corr": 0.4871722469263657, "objective/train/value_std": 0.0202178955078125, "objective/train/weight_avg": 1.000604271888733, "objective/train/weighted_lm_loss": 2.8877034187316895, "objective/train/weights_max": 1.0332152843475342, "objective/train/weights_min": 0.9428519606590271, "theoretical_loss": 3.378384700673164, "tokens_seen": 2410348544 }, { "epoch": 0.46, "learning_rate": 0.0005483349498868413, "loss": 1.3277, "theoretical_loss": 3.3783330417197073, "tokens_seen": 2410807296 }, { "epoch": 0.46, "learning_rate": 0.0005480116391852571, "loss": 1.3592, "theoretical_loss": 3.378274018320851, "tokens_seen": 2411331584 }, { "epoch": 0.46, "learning_rate": 0.0005476883284836728, "loss": 1.3345, "theoretical_loss": 3.3782150113462897, "tokens_seen": 2411855872 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0008641680469736457, "objective/train/docs_used": 1358876, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.814143419265747, "objective/train/original_loss": 2.814143657684326, "objective/train/theoretical_loss": 3.378200262167992, "objective/train/tokens_used": 770846176, "objective/train/value_avg": -0.0244293212890625, "objective/train/value_loss": 0.006682766135782003, "objective/train/value_max": -0.00051116943359375, "objective/train/value_min": -0.9951171875, "objective/train/value_reward_corr": 0.5135026575163789, "objective/train/value_std": 0.054473876953125, "objective/train/weight_avg": 1.0001193284988403, "objective/train/weighted_lm_loss": 2.8149795532226562, "objective/train/weights_max": 1.0800772905349731, "objective/train/weights_min": 0.9084425568580627, "theoretical_loss": 3.378200262167992, "tokens_seen": 2411986944 }, { "epoch": 0.46, "learning_rate": 0.0005473650177820886, "loss": 1.3219, "theoretical_loss": 3.378156020787885, "tokens_seen": 2412380160 }, { "epoch": 0.46, "learning_rate": 0.0005470417070805043, "loss": 1.3451, "theoretical_loss": 3.378097046637502, "tokens_seen": 2412904448 }, { "epoch": 0.46, "learning_rate": 0.0005467183963789202, "loss": 1.3162, "theoretical_loss": 3.3780380888870147, "tokens_seen": 2413428736 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0035242268349975348, "objective/train/docs_used": 1360381, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.954057216644287, "objective/train/original_loss": 2.954056978225708, "objective/train/theoretical_loss": 3.378015983957022, "objective/train/tokens_used": 772484576, "objective/train/value_avg": -0.0184783935546875, "objective/train/value_loss": 0.00317182345315814, "objective/train/value_max": -0.0004458427429199219, "objective/train/value_min": -0.5263671875, "objective/train/value_reward_corr": 0.35762681894538434, "objective/train/value_std": 0.0261688232421875, "objective/train/weight_avg": 1.0003681182861328, "objective/train/weighted_lm_loss": 2.9559948444366455, "objective/train/weights_max": 1.0358482599258423, "objective/train/weights_min": 0.9243550300598145, "theoretical_loss": 3.378015983957022, "tokens_seen": 2413625344 }, { "epoch": 0.46, "learning_rate": 0.000546395085677336, "loss": 1.3184, "theoretical_loss": 3.3779791475283005, "tokens_seen": 2413953024 }, { "epoch": 0.46, "learning_rate": 0.0005460717749757516, "loss": 1.357, "theoretical_loss": 3.377920222553244, "tokens_seen": 2414477312 }, { "epoch": 0.46, "learning_rate": 0.0005457484642741675, "loss": 1.3162, "theoretical_loss": 3.377861313953734, "tokens_seen": 2415001600 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0090870326384902, "objective/train/docs_used": 1361019, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.618438482284546, "objective/train/original_loss": 2.618438482284546, "objective/train/theoretical_loss": 3.3778318657922766, "objective/train/tokens_used": 774122976, "objective/train/value_avg": -0.019439697265625, "objective/train/value_loss": 0.002271833596751094, "objective/train/value_max": -0.0004513263702392578, "objective/train/value_min": -0.6728515625, "objective/train/value_reward_corr": 0.5366268667571131, "objective/train/value_std": 0.035858154296875, "objective/train/weight_avg": 1.000920057296753, "objective/train/weighted_lm_loss": 2.6209218502044678, "objective/train/weights_max": 1.0457051992416382, "objective/train/weights_min": 0.9425267577171326, "theoretical_loss": 3.3778318657922766, "tokens_seen": 2415263744 }, { "epoch": 0.46, "learning_rate": 0.0005454251535725832, "loss": 1.3614, "theoretical_loss": 3.3778024217216664, "tokens_seen": 2415525888 }, { "epoch": 0.46, "learning_rate": 0.0005451018428709991, "loss": 1.3595, "theoretical_loss": 3.3777435458489435, "tokens_seen": 2416050176 }, { "epoch": 0.46, "learning_rate": 0.0005447785321694149, "loss": 1.313, "theoretical_loss": 3.3776846863274717, "tokens_seen": 2416574464 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.008055069483816624, "objective/train/docs_used": 1362090, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8322510719299316, "objective/train/original_loss": 2.8322510719299316, "objective/train/theoretical_loss": 3.3776479074263275, "objective/train/tokens_used": 775761376, "objective/train/value_avg": -0.01483154296875, "objective/train/value_loss": 0.0011580399004742503, "objective/train/value_max": -0.0006566047668457031, "objective/train/value_min": -0.51708984375, "objective/train/value_reward_corr": 0.32313923662702665, "objective/train/value_std": 0.0205535888671875, "objective/train/weight_avg": 1.000811219215393, "objective/train/weighted_lm_loss": 2.834930896759033, "objective/train/weights_max": 1.0380048751831055, "objective/train/weights_min": 0.9360631704330444, "theoretical_loss": 3.3776479074263275, "tokens_seen": 2416902144 }, { "epoch": 0.46, "learning_rate": 0.0005444552214678305, "loss": 1.3296, "theoretical_loss": 3.3776258431491635, "tokens_seen": 2417098752 }, { "epoch": 0.46, "learning_rate": 0.0005441319107662464, "loss": 1.2921, "theoretical_loss": 3.377567016305938, "tokens_seen": 2417623040 }, { "epoch": 0.46, "learning_rate": 0.0005438086000646621, "loss": 1.3289, "theoretical_loss": 3.377508205789719, "tokens_seen": 2418147328 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.005849520210176706, "objective/train/docs_used": 1362527, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4597060680389404, "objective/train/original_loss": 2.459705352783203, "objective/train/theoretical_loss": 3.3774641086122985, "objective/train/tokens_used": 777399776, "objective/train/value_avg": -0.0243072509765625, "objective/train/value_loss": 0.004470037762075663, "objective/train/value_max": -0.0006313323974609375, "objective/train/value_min": -0.5576171875, "objective/train/value_reward_corr": 0.3055979256828526, "objective/train/value_std": 0.031768798828125, "objective/train/weight_avg": 1.0006070137023926, "objective/train/weighted_lm_loss": 2.460405111312866, "objective/train/weights_max": 1.0306984186172485, "objective/train/weights_min": 0.9209855794906616, "theoretical_loss": 3.3774641086122985, "tokens_seen": 2418540544 }, { "epoch": 0.46, "learning_rate": 0.0005434852893630779, "loss": 1.283, "theoretical_loss": 3.3774494115924374, "tokens_seen": 2418671616 }, { "epoch": 0.46, "learning_rate": 0.0005431619786614938, "loss": 1.3649, "theoretical_loss": 3.3773906337060287, "tokens_seen": 2419195904 }, { "epoch": 0.46, "learning_rate": 0.0005428386679599094, "loss": 1.2956, "theoretical_loss": 3.3773318721224337, "tokens_seen": 2419720192 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.005640473682433367, "objective/train/docs_used": 1363833, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7929024696350098, "objective/train/original_loss": 2.7929022312164307, "objective/train/theoretical_loss": 3.3772804691038605, "objective/train/tokens_used": 779038176, "objective/train/value_avg": -0.0139617919921875, "objective/train/value_loss": 0.0010541247902438045, "objective/train/value_max": -0.0006880760192871094, "objective/train/value_min": -0.18994140625, "objective/train/value_reward_corr": 0.3775547124484581, "objective/train/value_std": 0.01230621337890625, "objective/train/weight_avg": 1.000569224357605, "objective/train/weighted_lm_loss": 2.7950239181518555, "objective/train/weights_max": 1.0118427276611328, "objective/train/weights_min": 0.9317920804023743, "theoretical_loss": 3.3772804691038605, "tokens_seen": 2420178944 }, { "epoch": 0.46, "learning_rate": 0.0005425153572583253, "loss": 1.3673, "theoretical_loss": 3.377273126833601, "tokens_seen": 2420244480 }, { "epoch": 0.46, "learning_rate": 0.000542192046556741, "loss": 1.3169, "theoretical_loss": 3.377214397831483, "tokens_seen": 2420768768 }, { "epoch": 0.46, "learning_rate": 0.0005418687358551568, "loss": 1.321, "theoretical_loss": 3.377155685108038, "tokens_seen": 2421293056 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0026700992602854967, "objective/train/docs_used": 1364650, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1198837757110596, "objective/train/original_loss": 3.1198832988739014, "objective/train/theoretical_loss": 3.3770969886552313, "objective/train/tokens_used": 780676576, "objective/train/value_avg": -0.0279693603515625, "objective/train/value_loss": 0.009714287705719471, "objective/train/value_max": -0.0007042884826660156, "objective/train/value_min": -0.99853515625, "objective/train/value_reward_corr": 0.45777873287403387, "objective/train/value_std": 0.054473876953125, "objective/train/weight_avg": 1.0003148317337036, "objective/train/weighted_lm_loss": 3.1212334632873535, "objective/train/weights_max": 1.0841623544692993, "objective/train/weights_min": 0.9101977944374084, "theoretical_loss": 3.3770969886552313, "tokens_seen": 2421817344 }, { "epoch": 0.46, "learning_rate": 0.0005415454251535727, "loss": 1.3227, "theoretical_loss": 3.3770969886552313, "tokens_seen": 2421817344 }, { "epoch": 0.46, "learning_rate": 0.0005412221144519883, "loss": 1.3353, "theoretical_loss": 3.377038308465033, "tokens_seen": 2422341632 }, { "epoch": 0.46, "learning_rate": 0.0005408988037504042, "loss": 1.3138, "theoretical_loss": 3.376979644529418, "tokens_seen": 2422865920 }, { "epoch": 0.46, "learning_rate": 0.0005405754930488199, "loss": 1.3481, "theoretical_loss": 3.3769209968403677, "tokens_seen": 2423390208 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0026420950889587402, "objective/train/docs_used": 1365365, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.763035535812378, "objective/train/original_loss": 2.763035297393799, "objective/train/theoretical_loss": 3.3769136670211743, "objective/train/tokens_used": 782314976, "objective/train/value_avg": -0.017608642578125, "objective/train/value_loss": 0.004482826683670282, "objective/train/value_max": -0.000293731689453125, "objective/train/value_min": -0.91015625, "objective/train/value_reward_corr": 0.55700685482136, "objective/train/value_std": 0.041259765625, "objective/train/weight_avg": 1.0002862215042114, "objective/train/weighted_lm_loss": 2.763723850250244, "objective/train/weights_max": 1.045803427696228, "objective/train/weights_min": 0.9086901545524597, "theoretical_loss": 3.3769136670211743, "tokens_seen": 2423455744 }, { "epoch": 0.47, "learning_rate": 0.0005402521823472357, "loss": 1.2952, "theoretical_loss": 3.376862365389871, "tokens_seen": 2423914496 }, { "epoch": 0.47, "learning_rate": 0.0005399288716456516, "loss": 1.3169, "theoretical_loss": 3.3768037501699193, "tokens_seen": 2424438784 }, { "epoch": 0.47, "learning_rate": 0.0005396055609440672, "loss": 1.2976, "theoretical_loss": 3.376745151172512, "tokens_seen": 2424963072 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.00806615874171257, "objective/train/docs_used": 1366807, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.764299154281616, "objective/train/original_loss": 2.764298439025879, "objective/train/theoretical_loss": 3.3767305039569955, "objective/train/tokens_used": 783953376, "objective/train/value_avg": -0.017120361328125, "objective/train/value_loss": 0.0016420638421550393, "objective/train/value_max": -0.0005035400390625, "objective/train/value_min": -0.7021484375, "objective/train/value_reward_corr": 0.3573555159005911, "objective/train/value_std": 0.0221405029296875, "objective/train/weight_avg": 1.0008147954940796, "objective/train/weighted_lm_loss": 2.76682448387146, "objective/train/weights_max": 1.0675866603851318, "objective/train/weights_min": 0.9374104142189026, "theoretical_loss": 3.3767305039569955, "tokens_seen": 2425094144 }, { "epoch": 0.47, "learning_rate": 0.000539282250242483, "loss": 1.3352, "theoretical_loss": 3.376686568389653, "tokens_seen": 2425487360 }, { "epoch": 0.47, "learning_rate": 0.0005389589395408988, "loss": 1.3117, "theoretical_loss": 3.3766280018133523, "tokens_seen": 2426011648 }, { "epoch": 0.47, "learning_rate": 0.0005386356288393146, "loss": 1.2684, "theoretical_loss": 3.3765694514356253, "tokens_seen": 2426535936 }, { "epoch": 0.47, "objective/train/advantage_avg": -0.0024832116905599833, "objective/train/docs_used": 1367391, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.119192600250244, "objective/train/original_loss": 3.1191930770874023, "objective/train/theoretical_loss": 3.376547499218544, "objective/train/tokens_used": 785591776, "objective/train/value_avg": -0.0191192626953125, "objective/train/value_loss": 0.005725373048335314, "objective/train/value_max": -0.0006666183471679688, "objective/train/value_min": -0.81396484375, "objective/train/value_reward_corr": 0.36197564040283065, "objective/train/value_std": 0.031219482421875, "objective/train/weight_avg": 0.9997797608375549, "objective/train/weighted_lm_loss": 3.1203505992889404, "objective/train/weights_max": 1.054972767829895, "objective/train/weights_min": 0.9097143411636353, "theoretical_loss": 3.376547499218544, "tokens_seen": 2426732544 }, { "epoch": 0.47, "learning_rate": 0.0005383123181377305, "loss": 1.3613, "theoretical_loss": 3.3765109172484937, "tokens_seen": 2427060224 }, { "epoch": 0.47, "learning_rate": 0.0005379890074361461, "loss": 1.2649, "theoretical_loss": 3.376452399243984, "tokens_seen": 2427584512 }, { "epoch": 0.47, "learning_rate": 0.0005376656967345619, "loss": 1.2855, "theoretical_loss": 3.376393897414129, "tokens_seen": 2428108800 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.010923718102276325, "objective/train/docs_used": 1368496, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.654297351837158, "objective/train/original_loss": 2.654297351837158, "objective/train/theoretical_loss": 3.3763646525622084, "objective/train/tokens_used": 787230176, "objective/train/value_avg": -0.0223541259765625, "objective/train/value_loss": 0.0009255923214368522, "objective/train/value_max": -0.00034737586975097656, "objective/train/value_min": -0.3330078125, "objective/train/value_reward_corr": 0.6850329977687396, "objective/train/value_std": 0.037841796875, "objective/train/weight_avg": 1.0010969638824463, "objective/train/weighted_lm_loss": 2.657768726348877, "objective/train/weights_max": 1.02498197555542, "objective/train/weights_min": 0.9495272636413574, "theoretical_loss": 3.3763646525622084, "tokens_seen": 2428370944 }, { "epoch": 0.47, "learning_rate": 0.0005373423860329777, "loss": 1.3006, "theoretical_loss": 3.3763354117509667, "tokens_seen": 2428633088 }, { "epoch": 0.47, "learning_rate": 0.0005370190753313935, "loss": 1.2757, "theoretical_loss": 3.376276942246541, "tokens_seen": 2429157376 }, { "epoch": 0.47, "learning_rate": 0.0005366957646298092, "loss": 1.3172, "theoretical_loss": 3.3762184888929014, "tokens_seen": 2429681664 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.004839686676859856, "objective/train/docs_used": 1368772, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.414233446121216, "objective/train/original_loss": 2.4142329692840576, "objective/train/theoretical_loss": 3.3761819637449175, "objective/train/tokens_used": 788868576, "objective/train/value_avg": -0.01476287841796875, "objective/train/value_loss": 0.0007763319299556315, "objective/train/value_max": -0.0002868175506591797, "objective/train/value_min": -0.68701171875, "objective/train/value_reward_corr": 0.22849675936248304, "objective/train/value_std": 0.017608642578125, "objective/train/weight_avg": 1.0004878044128418, "objective/train/weighted_lm_loss": 2.4168732166290283, "objective/train/weights_max": 1.0685451030731201, "objective/train/weights_min": 0.968094527721405, "theoretical_loss": 3.3761819637449175, "tokens_seen": 2430009344 }, { "epoch": 0.47, "learning_rate": 0.0005363724539282251, "loss": 1.3072, "theoretical_loss": 3.376160051682103, "tokens_seen": 2430205952 }, { "epoch": 0.47, "learning_rate": 0.0005360491432266408, "loss": 1.2803, "theoretical_loss": 3.3761016306062066, "tokens_seen": 2430730240 }, { "epoch": 0.47, "learning_rate": 0.0005357258325250566, "loss": 1.3153, "theoretical_loss": 3.3760432256572783, "tokens_seen": 2431254528 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.005298976320773363, "objective/train/docs_used": 1370076, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9584295749664307, "objective/train/original_loss": 2.9584293365478516, "objective/train/theoretical_loss": 3.375999432524136, "objective/train/tokens_used": 790506976, "objective/train/value_avg": -0.042572021484375, "objective/train/value_loss": 0.009439471177756786, "objective/train/value_max": -0.0005397796630859375, "objective/train/value_min": -0.99755859375, "objective/train/value_reward_corr": 0.6685365418159221, "objective/train/value_std": 0.087646484375, "objective/train/weight_avg": 1.000576376914978, "objective/train/weighted_lm_loss": 2.959840774536133, "objective/train/weights_max": 1.077638864517212, "objective/train/weights_min": 0.9076888561248779, "theoretical_loss": 3.375999432524136, "tokens_seen": 2431647744 }, { "epoch": 0.47, "learning_rate": 0.0005354025218234724, "loss": 1.3277, "theoretical_loss": 3.3759848368273904, "tokens_seen": 2431778816 }, { "epoch": 0.47, "learning_rate": 0.0005350792111218881, "loss": 1.3227, "theoretical_loss": 3.3759264641086197, "tokens_seen": 2432303104 }, { "epoch": 0.47, "learning_rate": 0.000534755900420304, "loss": 1.2957, "theoretical_loss": 3.3758681074930506, "tokens_seen": 2432827392 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.008088125847280025, "objective/train/docs_used": 1370835, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8184518814086914, "objective/train/original_loss": 2.8184516429901123, "objective/train/theoretical_loss": 3.3758170586578644, "objective/train/tokens_used": 792145376, "objective/train/value_avg": -0.01885986328125, "objective/train/value_loss": 0.002978898584842682, "objective/train/value_max": -0.0006046295166015625, "objective/train/value_min": -0.91455078125, "objective/train/value_reward_corr": 0.6215454410469987, "objective/train/value_std": 0.042633056640625, "objective/train/weight_avg": 1.0008234977722168, "objective/train/weighted_lm_loss": 2.8208932876586914, "objective/train/weights_max": 1.0614633560180664, "objective/train/weights_min": 0.9120888710021973, "theoretical_loss": 3.3758170586578644, "tokens_seen": 2433286144 }, { "epoch": 0.47, "learning_rate": 0.0005344325897187197, "loss": 1.3375, "theoretical_loss": 3.37580976697277, "tokens_seen": 2433351680 }, { "epoch": 0.47, "learning_rate": 0.0005341092790171354, "loss": 1.296, "theoretical_loss": 3.3757514425398742, "tokens_seen": 2433875968 }, { "epoch": 0.47, "learning_rate": 0.0005337859683155513, "loss": 1.2999, "theoretical_loss": 3.375693134186462, "tokens_seen": 2434400256 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.005265907384455204, "objective/train/docs_used": 1372111, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.63374662399292, "objective/train/original_loss": 2.633746862411499, "objective/train/theoretical_loss": 3.3756348419046387, "objective/train/tokens_used": 793783776, "objective/train/value_avg": -0.0130462646484375, "objective/train/value_loss": 0.0020670429803431034, "objective/train/value_max": -0.0003077983856201172, "objective/train/value_min": -0.2144775390625, "objective/train/value_reward_corr": 0.16970695471766087, "objective/train/value_std": 0.0126190185546875, "objective/train/weight_avg": 1.0005367994308472, "objective/train/weighted_lm_loss": 2.635310173034668, "objective/train/weights_max": 1.0209332704544067, "objective/train/weights_min": 0.9198985695838928, "theoretical_loss": 3.3756348419046387, "tokens_seen": 2434924544 }, { "epoch": 0.47, "learning_rate": 0.000533462657613967, "loss": 1.3579, "theoretical_loss": 3.3756348419046387, "tokens_seen": 2434924544 }, { "epoch": 0.47, "learning_rate": 0.0005331393469123829, "loss": 1.3291, "theoretical_loss": 3.375576565686516, "tokens_seen": 2435448832 }, { "epoch": 0.47, "learning_rate": 0.0005328160362107985, "loss": 1.3728, "theoretical_loss": 3.3755183055242104, "tokens_seen": 2435973120 }, { "epoch": 0.47, "learning_rate": 0.0005324927255092143, "loss": 1.3141, "theoretical_loss": 3.375460061409844, "tokens_seen": 2436497408 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.008567996323108673, "objective/train/docs_used": 1372668, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8169753551483154, "objective/train/original_loss": 2.8169753551483154, "objective/train/theoretical_loss": 3.375452782023527, "objective/train/tokens_used": 795422176, "objective/train/value_avg": -0.0209197998046875, "objective/train/value_loss": 0.004357577301561832, "objective/train/value_max": -0.00046181678771972656, "objective/train/value_min": -0.8603515625, "objective/train/value_reward_corr": 0.5204789925156158, "objective/train/value_std": 0.0333251953125, "objective/train/weight_avg": 1.0008782148361206, "objective/train/weighted_lm_loss": 2.8196847438812256, "objective/train/weights_max": 1.0321465730667114, "objective/train/weights_min": 0.9101662039756775, "theoretical_loss": 3.375452782023527, "tokens_seen": 2436562944 }, { "epoch": 0.47, "learning_rate": 0.0005321694148076302, "loss": 1.3065, "theoretical_loss": 3.3754018333355447, "tokens_seen": 2437021696 }, { "epoch": 0.47, "learning_rate": 0.0005318461041060459, "loss": 1.2912, "theoretical_loss": 3.375343621293445, "tokens_seen": 2437545984 }, { "epoch": 0.47, "learning_rate": 0.0005315227934044618, "loss": 1.3434, "theoretical_loss": 3.375285425275685, "tokens_seen": 2438070272 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.004774549510329962, "objective/train/docs_used": 1374127, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7653377056121826, "objective/train/original_loss": 2.7653377056121826, "objective/train/theoretical_loss": 3.3752708787741277, "objective/train/tokens_used": 797060576, "objective/train/value_avg": -0.0143280029296875, "objective/train/value_loss": 0.0010982435196638107, "objective/train/value_max": -0.0005440711975097656, "objective/train/value_min": -0.95263671875, "objective/train/value_reward_corr": 0.5452445685110965, "objective/train/value_std": 0.0193634033203125, "objective/train/weight_avg": 1.0004829168319702, "objective/train/weighted_lm_loss": 2.7676191329956055, "objective/train/weights_max": 1.0192440748214722, "objective/train/weights_min": 0.9116982817649841, "theoretical_loss": 3.3752708787741277, "tokens_seen": 2438201344 }, { "epoch": 0.47, "learning_rate": 0.0005311994827028774, "loss": 1.3164, "theoretical_loss": 3.375227245274409, "tokens_seen": 2438594560 }, { "epoch": 0.47, "learning_rate": 0.0005308761720012932, "loss": 1.3032, "theoretical_loss": 3.375169081281766, "tokens_seen": 2439118848 }, { "epoch": 0.47, "learning_rate": 0.0005305528612997091, "loss": 1.3267, "theoretical_loss": 3.375110933289912, "tokens_seen": 2439643136 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.005783334840089083, "objective/train/docs_used": 1374810, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5752549171447754, "objective/train/original_loss": 2.5752546787261963, "objective/train/theoretical_loss": 3.3750891319165697, "objective/train/tokens_used": 798698976, "objective/train/value_avg": -0.01477813720703125, "objective/train/value_loss": 0.0005102395080029964, "objective/train/value_max": -0.0005173683166503906, "objective/train/value_min": -0.207275390625, "objective/train/value_reward_corr": 0.5772229695002112, "objective/train/value_std": 0.015594482421875, "objective/train/weight_avg": 1.000580906867981, "objective/train/weighted_lm_loss": 2.5771842002868652, "objective/train/weights_max": 1.0198713541030884, "objective/train/weights_min": 0.9560080170631409, "theoretical_loss": 3.3750891319165697, "tokens_seen": 2439839744 }, { "epoch": 0.48, "learning_rate": 0.0005302295505981248, "loss": 1.3397, "theoretical_loss": 3.375052801291008, "tokens_seen": 2440167424 }, { "epoch": 0.48, "learning_rate": 0.0005299062398965406, "loss": 1.318, "theoretical_loss": 3.3749946852772204, "tokens_seen": 2440691712 }, { "epoch": 0.48, "learning_rate": 0.0005295829291949563, "loss": 1.3052, "theoretical_loss": 3.3749365852407216, "tokens_seen": 2441216000 }, { "epoch": 0.48, "objective/train/advantage_avg": -0.006857650354504585, "objective/train/docs_used": 1375482, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.729423999786377, "objective/train/original_loss": 2.729424238204956, "objective/train/theoretical_loss": 3.3749075412115106, "objective/train/tokens_used": 800337376, "objective/train/value_avg": -0.0419921875, "objective/train/value_loss": 0.009095266461372375, "objective/train/value_max": -0.0006165504455566406, "objective/train/value_min": -0.97216796875, "objective/train/value_reward_corr": 0.6128105606003763, "objective/train/value_std": 0.07342529296875, "objective/train/weight_avg": 0.999359130859375, "objective/train/weighted_lm_loss": 2.728484869003296, "objective/train/weights_max": 1.051591396331787, "objective/train/weights_min": 0.9076107740402222, "theoretical_loss": 3.3749075412115106, "tokens_seen": 2441478144 }, { "epoch": 0.48, "learning_rate": 0.0005292596184933721, "loss": 1.3138, "theoretical_loss": 3.3748785011736886, "tokens_seen": 2441740288 }, { "epoch": 0.48, "learning_rate": 0.000528936307791788, "loss": 1.3134, "theoretical_loss": 3.3748204330683054, "tokens_seen": 2442264576 }, { "epoch": 0.48, "learning_rate": 0.0005286129970902037, "loss": 1.3165, "theoretical_loss": 3.374762380916759, "tokens_seen": 2442788864 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0046464307233691216, "objective/train/docs_used": 1376516, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6790828704833984, "objective/train/original_loss": 2.6790828704833984, "objective/train/theoretical_loss": 3.374726106420133, "objective/train/tokens_used": 801975776, "objective/train/value_avg": -0.02154541015625, "objective/train/value_loss": 0.0031734169460833073, "objective/train/value_max": -0.0004458427429199219, "objective/train/value_min": -0.67138671875, "objective/train/value_reward_corr": 0.5248083914993177, "objective/train/value_std": 0.031341552734375, "objective/train/weight_avg": 1.0004802942276, "objective/train/weighted_lm_loss": 2.6806788444519043, "objective/train/weights_max": 1.0375983715057373, "objective/train/weights_min": 0.9201204180717468, "theoretical_loss": 3.374726106420133, "tokens_seen": 2443116544 }, { "epoch": 0.48, "learning_rate": 0.0005282896863886195, "loss": 1.2916, "theoretical_loss": 3.3747043447112457, "tokens_seen": 2443313152 }, { "epoch": 0.48, "learning_rate": 0.0005279663756870352, "loss": 1.339, "theoretical_loss": 3.374646324443963, "tokens_seen": 2443837440 }, { "epoch": 0.48, "learning_rate": 0.000527643064985451, "loss": 1.3252, "theoretical_loss": 3.374588320107117, "tokens_seen": 2444361728 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0017844048561528325, "objective/train/docs_used": 1376516, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9802141189575195, "objective/train/original_loss": 2.9802141189575195, "objective/train/theoretical_loss": 3.3745448273041454, "objective/train/tokens_used": 803614176, "objective/train/value_avg": -0.02154541015625, "objective/train/value_loss": 0.005925813689827919, "objective/train/value_max": -0.0011472702026367188, "objective/train/value_min": -0.90185546875, "objective/train/value_reward_corr": 0.44103611131450204, "objective/train/value_std": 0.037139892578125, "objective/train/weight_avg": 1.0002076625823975, "objective/train/weighted_lm_loss": 2.980848789215088, "objective/train/weights_max": 1.0507705211639404, "objective/train/weights_min": 0.9104285836219788, "theoretical_loss": 3.3745448273041454, "tokens_seen": 2444754944 }, { "epoch": 0.48, "learning_rate": 0.0005273197542838668, "loss": 1.3571, "theoretical_loss": 3.374530331692918, "tokens_seen": 2444886016 }, { "epoch": 0.48, "learning_rate": 0.0005269964435822826, "loss": 1.3448, "theoretical_loss": 3.3744723591935823, "tokens_seen": 2445410304 }, { "epoch": 0.48, "learning_rate": 0.0005266731328806984, "loss": 1.3463, "theoretical_loss": 3.3744144026013307, "tokens_seen": 2445934592 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0044324262998998165, "objective/train/docs_used": 1377582, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.899829864501953, "objective/train/original_loss": 2.899829626083374, "objective/train/theoretical_loss": 3.3743637036257805, "objective/train/tokens_used": 805252576, "objective/train/value_avg": -0.0128173828125, "objective/train/value_loss": 0.0010117357596755028, "objective/train/value_max": -0.0005092620849609375, "objective/train/value_min": -0.42333984375, "objective/train/value_reward_corr": 0.5266619920659524, "objective/train/value_std": 0.01479339599609375, "objective/train/weight_avg": 1.000448226928711, "objective/train/weighted_lm_loss": 2.901888132095337, "objective/train/weights_max": 1.012972354888916, "objective/train/weights_min": 0.9341378808021545, "theoretical_loss": 3.3743637036257805, "tokens_seen": 2446393344 }, { "epoch": 0.48, "learning_rate": 0.0005263498221791141, "loss": 1.4025, "theoretical_loss": 3.3743564619083912, "tokens_seen": 2446458880 }, { "epoch": 0.48, "learning_rate": 0.0005260265114775299, "loss": 1.366, "theoretical_loss": 3.3742985371069953, "tokens_seen": 2446983168 }, { "epoch": 0.48, "learning_rate": 0.0005257032007759457, "loss": 1.3872, "theoretical_loss": 3.3742406281893813, "tokens_seen": 2447507456 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.005415008403360844, "objective/train/docs_used": 1378304, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6257128715515137, "objective/train/original_loss": 2.6257123947143555, "objective/train/theoretical_loss": 3.374182735147792, "objective/train/tokens_used": 806890976, "objective/train/value_avg": -0.0117950439453125, "objective/train/value_loss": 0.0014556942041963339, "objective/train/value_max": -0.0006666183471679688, "objective/train/value_min": -0.95751953125, "objective/train/value_reward_corr": 0.4263254990175743, "objective/train/value_std": 0.0212860107421875, "objective/train/weight_avg": 1.0005487203598022, "objective/train/weighted_lm_loss": 2.627268075942993, "objective/train/weights_max": 1.0678620338439941, "objective/train/weights_min": 0.9491945505142212, "theoretical_loss": 3.374182735147792, "tokens_seen": 2448031744 }, { "epoch": 0.48, "learning_rate": 0.0005253798900743615, "loss": 1.3757, "theoretical_loss": 3.374182735147792, "tokens_seen": 2448031744 }, { "epoch": 0.48, "learning_rate": 0.0005250565793727773, "loss": 1.3099, "theoretical_loss": 3.3741248579744774, "tokens_seen": 2448556032 }, { "epoch": 0.48, "learning_rate": 0.0005247332686711929, "loss": 1.3627, "theoretical_loss": 3.37406699666169, "tokens_seen": 2449080320 }, { "epoch": 0.48, "learning_rate": 0.0005244099579696088, "loss": 1.3677, "theoretical_loss": 3.374009151201691, "tokens_seen": 2449604608 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.006765672937035561, "objective/train/docs_used": 1378955, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.708732843399048, "objective/train/original_loss": 2.708732843399048, "objective/train/theoretical_loss": 3.374001921633455, "objective/train/tokens_used": 808529376, "objective/train/value_avg": -0.018798828125, "objective/train/value_loss": 0.002449575113132596, "objective/train/value_max": -0.0005011558532714844, "objective/train/value_min": -0.810546875, "objective/train/value_reward_corr": 0.518196635092907, "objective/train/value_std": 0.03662109375, "objective/train/weight_avg": 1.0006886720657349, "objective/train/weighted_lm_loss": 2.710268974304199, "objective/train/weights_max": 1.0720124244689941, "objective/train/weights_min": 0.9471514225006104, "theoretical_loss": 3.374001921633455, "tokens_seen": 2449670144 }, { "epoch": 0.48, "learning_rate": 0.0005240866472680245, "loss": 1.3535, "theoretical_loss": 3.373951321586745, "tokens_seen": 2450128896 }, { "epoch": 0.48, "learning_rate": 0.0005237633365664404, "loss": 1.3273, "theoretical_loss": 3.3738935078091226, "tokens_seen": 2450653184 }, { "epoch": 0.48, "learning_rate": 0.0005234400258648562, "loss": 1.3546, "theoretical_loss": 3.373835709861099, "tokens_seen": 2451177472 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0070000626146793365, "objective/train/docs_used": 1380357, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6046032905578613, "objective/train/original_loss": 2.6046035289764404, "objective/train/theoretical_loss": 3.3738212628465636, "objective/train/tokens_used": 810167776, "objective/train/value_avg": -0.033721923828125, "objective/train/value_loss": 0.009149467572569847, "objective/train/value_max": -0.00015723705291748047, "objective/train/value_min": -0.9931640625, "objective/train/value_reward_corr": 0.6487367934990022, "objective/train/value_std": 0.08941650390625, "objective/train/weight_avg": 1.0007450580596924, "objective/train/weighted_lm_loss": 2.6065127849578857, "objective/train/weights_max": 1.086774468421936, "objective/train/weights_min": 0.9066487550735474, "theoretical_loss": 3.3738212628465636, "tokens_seen": 2451308544 }, { "epoch": 0.48, "learning_rate": 0.0005231167151632718, "loss": 1.3659, "theoretical_loss": 3.3737779277349564, "tokens_seen": 2451701760 }, { "epoch": 0.48, "learning_rate": 0.0005227934044616877, "loss": 1.3076, "theoretical_loss": 3.3737201614229813, "tokens_seen": 2452226048 }, { "epoch": 0.48, "learning_rate": 0.0005224700937601034, "loss": 1.3318, "theoretical_loss": 3.3736624109174658, "tokens_seen": 2452750336 }, { "epoch": 0.48, "objective/train/advantage_avg": -0.0032640735153108835, "objective/train/docs_used": 1380887, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3641793727874756, "objective/train/original_loss": 2.3641791343688965, "objective/train/theoretical_loss": 3.3736407585514283, "objective/train/tokens_used": 811806176, "objective/train/value_avg": -0.0291900634765625, "objective/train/value_loss": 0.010588186793029308, "objective/train/value_max": -0.0005011558532714844, "objective/train/value_min": -0.990234375, "objective/train/value_reward_corr": 0.6574202042058299, "objective/train/value_std": 0.0921630859375, "objective/train/weight_avg": 0.9997255206108093, "objective/train/weighted_lm_loss": 2.3632636070251465, "objective/train/weights_max": 1.0772804021835327, "objective/train/weights_min": 0.9056357741355896, "theoretical_loss": 3.3736407585514283, "tokens_seen": 2452946944 }, { "epoch": 0.48, "learning_rate": 0.0005221467830585193, "loss": 1.3431, "theoretical_loss": 3.3736046762107077, "tokens_seen": 2453274624 }, { "epoch": 0.48, "learning_rate": 0.0005218234723569351, "loss": 1.2883, "theoretical_loss": 3.37354695729501, "tokens_seen": 2453798912 }, { "epoch": 0.48, "learning_rate": 0.0005215001616553508, "loss": 1.3466, "theoretical_loss": 3.373489254162681, "tokens_seen": 2454323200 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.005812616087496281, "objective/train/docs_used": 1382209, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9646966457366943, "objective/train/original_loss": 2.9646966457366943, "objective/train/theoretical_loss": 3.3734604085128774, "objective/train/tokens_used": 813444576, "objective/train/value_avg": -0.0228729248046875, "objective/train/value_loss": 0.007100636139512062, "objective/train/value_max": -0.0004546642303466797, "objective/train/value_min": -0.9853515625, "objective/train/value_reward_corr": 0.5571542644649518, "objective/train/value_std": 0.0548095703125, "objective/train/weight_avg": 1.0006160736083984, "objective/train/weighted_lm_loss": 2.966613531112671, "objective/train/weights_max": 1.0963042974472046, "objective/train/weights_min": 0.9108531475067139, "theoretical_loss": 3.3734604085128774, "tokens_seen": 2454585344 }, { "epoch": 0.48, "learning_rate": 0.0005211768509537666, "loss": 1.348, "theoretical_loss": 3.373431566806034, "tokens_seen": 2454847488 }, { "epoch": 0.48, "learning_rate": 0.0005208535402521823, "loss": 1.3383, "theoretical_loss": 3.373373895217389, "tokens_seen": 2455371776 }, { "epoch": 0.48, "learning_rate": 0.0005205302295505981, "loss": 1.3336, "theoretical_loss": 3.37331623938907, "tokens_seen": 2455896064 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0031331642530858517, "objective/train/docs_used": 1382688, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7433459758758545, "objective/train/original_loss": 2.7433457374572754, "objective/train/theoretical_loss": 3.373280212496253, "objective/train/tokens_used": 815082976, "objective/train/value_avg": -0.029144287109375, "objective/train/value_loss": 0.005297439638525248, "objective/train/value_max": -0.0007581710815429688, "objective/train/value_min": -0.9931640625, "objective/train/value_reward_corr": 0.8379783798161746, "objective/train/value_std": 0.09735107421875, "objective/train/weight_avg": 1.000339388847351, "objective/train/weighted_lm_loss": 2.745425224304199, "objective/train/weights_max": 1.05418860912323, "objective/train/weights_min": 0.9094842076301575, "theoretical_loss": 3.373280212496253, "tokens_seen": 2456223744 }, { "epoch": 0.49, "learning_rate": 0.000520206918849014, "loss": 1.3568, "theoretical_loss": 3.3732585993134068, "tokens_seen": 2456420352 }, { "epoch": 0.49, "learning_rate": 0.0005198836081474297, "loss": 1.3213, "theoretical_loss": 3.373200974982735, "tokens_seen": 2456944640 }, { "epoch": 0.49, "learning_rate": 0.0005195602974458455, "loss": 1.3176, "theoretical_loss": 3.3731433663893955, "tokens_seen": 2457468928 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.007794058416038752, "objective/train/docs_used": 1383732, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.710420846939087, "objective/train/original_loss": 2.710420608520508, "objective/train/theoretical_loss": 3.3731001702674104, "objective/train/tokens_used": 816721376, "objective/train/value_avg": -0.0139923095703125, "objective/train/value_loss": 0.00042020855471491814, "objective/train/value_max": -0.0007181167602539062, "objective/train/value_min": -0.256103515625, "objective/train/value_reward_corr": 0.3165461598594619, "objective/train/value_std": 0.0144500732421875, "objective/train/weight_avg": 1.0007814168930054, "objective/train/weighted_lm_loss": 2.713406562805176, "objective/train/weights_max": 1.0238827466964722, "objective/train/weights_min": 0.9807237386703491, "theoretical_loss": 3.3731001702674104, "tokens_seen": 2457862144 }, { "epoch": 0.49, "learning_rate": 0.0005192369867442612, "loss": 1.3298, "theoretical_loss": 3.373085773525734, "tokens_seen": 2457993216 }, { "epoch": 0.49, "learning_rate": 0.000518913676042677, "loss": 1.3232, "theoretical_loss": 3.373028196384101, "tokens_seen": 2458517504 }, { "epoch": 0.49, "learning_rate": 0.0005185903653410929, "loss": 1.2893, "theoretical_loss": 3.3729706349568547, "tokens_seen": 2459041792 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.008971980772912502, "objective/train/docs_used": 1384412, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.415410041809082, "objective/train/original_loss": 2.415410041809082, "objective/train/theoretical_loss": 3.372920281592717, "objective/train/tokens_used": 818359776, "objective/train/value_avg": -0.0205535888671875, "objective/train/value_loss": 0.001228276756592095, "objective/train/value_max": -0.0007948875427246094, "objective/train/value_min": -0.443115234375, "objective/train/value_reward_corr": 0.47696475369497765, "objective/train/value_std": 0.028289794921875, "objective/train/weight_avg": 1.0009032487869263, "objective/train/weighted_lm_loss": 2.418581008911133, "objective/train/weights_max": 1.0449398756027222, "objective/train/weights_min": 0.9107033610343933, "theoretical_loss": 3.372920281592717, "tokens_seen": 2459500544 }, { "epoch": 0.49, "learning_rate": 0.0005182670546395086, "loss": 1.3112, "theoretical_loss": 3.3729130892363566, "tokens_seen": 2459566080 }, { "epoch": 0.49, "learning_rate": 0.0005179437439379243, "loss": 1.3043, "theoretical_loss": 3.3728555592149734, "tokens_seen": 2460090368 }, { "epoch": 0.49, "learning_rate": 0.0005176204332363401, "loss": 1.2783, "theoretical_loss": 3.3727980448850783, "tokens_seen": 2460614656 }, { "epoch": 0.49, "objective/train/advantage_avg": -0.002125292783603072, "objective/train/docs_used": 1385122, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5709095001220703, "objective/train/original_loss": 2.570909023284912, "objective/train/theoretical_loss": 3.37274054623905, "objective/train/tokens_used": 819998176, "objective/train/value_avg": -0.0244293212890625, "objective/train/value_loss": 0.00414518965408206, "objective/train/value_max": -0.0003814697265625, "objective/train/value_min": -0.98486328125, "objective/train/value_reward_corr": 0.5757200914143992, "objective/train/value_std": 0.052703857421875, "objective/train/weight_avg": 0.9998080134391785, "objective/train/weighted_lm_loss": 2.5714399814605713, "objective/train/weights_max": 1.070515751838684, "objective/train/weights_min": 0.9211212396621704, "theoretical_loss": 3.37274054623905, "tokens_seen": 2461138944 }, { "epoch": 0.49, "learning_rate": 0.0005172971225347559, "loss": 1.3407, "theoretical_loss": 3.37274054623905, "tokens_seen": 2461138944 }, { "epoch": 0.49, "learning_rate": 0.0005169738118331718, "loss": 1.2951, "theoretical_loss": 3.3726830632692706, "tokens_seen": 2461663232 }, { "epoch": 0.49, "learning_rate": 0.0005166505011315875, "loss": 1.3282, "theoretical_loss": 3.37262559596813, "tokens_seen": 2462187520 }, { "epoch": 0.49, "learning_rate": 0.0005163271904300032, "loss": 1.336, "theoretical_loss": 3.3725681443280213, "tokens_seen": 2462711808 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0017531848279759288, "objective/train/docs_used": 1386383, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8202943801879883, "objective/train/original_loss": 2.8202943801879883, "objective/train/theoretical_loss": 3.3725609639737955, "objective/train/tokens_used": 821636576, "objective/train/value_avg": -0.03143310546875, "objective/train/value_loss": 0.0064402371644973755, "objective/train/value_max": -0.0006189346313476562, "objective/train/value_min": -0.9951171875, "objective/train/value_reward_corr": 0.7548891013415915, "objective/train/value_std": 0.07708740234375, "objective/train/weight_avg": 1.0002071857452393, "objective/train/weighted_lm_loss": 2.8212404251098633, "objective/train/weights_max": 1.0591374635696411, "objective/train/weights_min": 0.9088289737701416, "theoretical_loss": 3.3725609639737955, "tokens_seen": 2462777344 }, { "epoch": 0.49, "learning_rate": 0.000516003879728419, "loss": 1.3361, "theoretical_loss": 3.3725107083413444, "tokens_seen": 2463236096 }, { "epoch": 0.49, "learning_rate": 0.0005156805690268348, "loss": 1.375, "theoretical_loss": 3.3724532880005036, "tokens_seen": 2463760384 }, { "epoch": 0.49, "learning_rate": 0.0005153572583252505, "loss": 1.3649, "theoretical_loss": 3.372395883297909, "tokens_seen": 2464284672 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.011689224280416965, "objective/train/docs_used": 1387153, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.71246337890625, "objective/train/original_loss": 2.71246337890625, "objective/train/theoretical_loss": 3.3723815345648473, "objective/train/tokens_used": 823274976, "objective/train/value_avg": -0.0216217041015625, "objective/train/value_loss": 0.002141159726306796, "objective/train/value_max": -0.000743865966796875, "objective/train/value_min": -0.91259765625, "objective/train/value_reward_corr": 0.5781416028695933, "objective/train/value_std": 0.044708251953125, "objective/train/weight_avg": 1.001179575920105, "objective/train/weighted_lm_loss": 2.716693878173828, "objective/train/weights_max": 1.036365270614624, "objective/train/weights_min": 0.9064832925796509, "theoretical_loss": 3.3723815345648473, "tokens_seen": 2464415744 }, { "epoch": 0.49, "learning_rate": 0.0005150339476236664, "loss": 1.3502, "theoretical_loss": 3.3723384942259758, "tokens_seen": 2464808960 }, { "epoch": 0.49, "learning_rate": 0.0005147106369220821, "loss": 1.3188, "theoretical_loss": 3.372281120777125, "tokens_seen": 2465333248 }, { "epoch": 0.49, "learning_rate": 0.0005143873262204979, "loss": 1.3175, "theoretical_loss": 3.372223762943781, "tokens_seen": 2465857536 }, { "epoch": 0.49, "objective/train/advantage_avg": 2.27491636906052e-05, "objective/train/docs_used": 1388132, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7850615978240967, "objective/train/original_loss": 2.785061836242676, "objective/train/theoretical_loss": 3.3722022577806054, "objective/train/tokens_used": 824913376, "objective/train/value_avg": -0.0162200927734375, "objective/train/value_loss": 0.0037456059362739325, "objective/train/value_max": -0.0003829002380371094, "objective/train/value_min": -0.97314453125, "objective/train/value_reward_corr": 0.517925134163488, "objective/train/value_std": 0.031036376953125, "objective/train/weight_avg": 1.0000207424163818, "objective/train/weighted_lm_loss": 2.7858150005340576, "objective/train/weights_max": 1.0493100881576538, "objective/train/weights_min": 0.9116849899291992, "theoretical_loss": 3.3722022577806054, "tokens_seen": 2466054144 }, { "epoch": 0.49, "learning_rate": 0.0005140640155189137, "loss": 1.3158, "theoretical_loss": 3.3721664207183766, "tokens_seen": 2466381824 }, { "epoch": 0.49, "learning_rate": 0.0005137407048173294, "loss": 1.3385, "theoretical_loss": 3.3721090940933465, "tokens_seen": 2466906112 }, { "epoch": 0.49, "learning_rate": 0.0005134173941157453, "loss": 1.3479, "theoretical_loss": 3.372051783061134, "tokens_seen": 2467430400 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0043800449930131435, "objective/train/docs_used": 1388592, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6878554821014404, "objective/train/original_loss": 2.6878552436828613, "objective/train/theoretical_loss": 3.3720231333899733, "objective/train/tokens_used": 826551776, "objective/train/value_avg": -0.01178741455078125, "objective/train/value_loss": 0.0018184538930654526, "objective/train/value_max": -0.00040459632873535156, "objective/train/value_min": -0.7509765625, "objective/train/value_reward_corr": 0.4059384276984716, "objective/train/value_std": 0.01715087890625, "objective/train/weight_avg": 1.0004469156265259, "objective/train/weighted_lm_loss": 2.6891348361968994, "objective/train/weights_max": 1.019274353981018, "objective/train/weights_min": 0.9197314977645874, "theoretical_loss": 3.3720231333899733, "tokens_seen": 2467692544 }, { "epoch": 0.49, "learning_rate": 0.000513094083414161, "loss": 1.3353, "theoretical_loss": 3.3719944876141845, "tokens_seen": 2467954688 }, { "epoch": 0.49, "learning_rate": 0.0005127707727125768, "loss": 1.3412, "theoretical_loss": 3.371937207744951, "tokens_seen": 2468478976 }, { "epoch": 0.49, "learning_rate": 0.0005124474620109926, "loss": 1.3774, "theoretical_loss": 3.371879943445891, "tokens_seen": 2469003264 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.010311514139175415, "objective/train/docs_used": 1390136, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.98128342628479, "objective/train/original_loss": 2.981283664703369, "objective/train/theoretical_loss": 3.3718441611623575, "objective/train/tokens_used": 828190176, "objective/train/value_avg": -0.0167083740234375, "objective/train/value_loss": 0.0026049059815704823, "objective/train/value_max": -0.0001971721649169922, "objective/train/value_min": -0.82177734375, "objective/train/value_reward_corr": 0.30726676575596046, "objective/train/value_std": 0.0273895263671875, "objective/train/weight_avg": 1.0010440349578857, "objective/train/weighted_lm_loss": 2.98502516746521, "objective/train/weights_max": 1.0834598541259766, "objective/train/weights_min": 0.9243125915527344, "theoretical_loss": 3.3718441611623575, "tokens_seen": 2469330944 }, { "epoch": 0.49, "learning_rate": 0.0005121241513094083, "loss": 1.3452, "theoretical_loss": 3.3718226947094667, "tokens_seen": 2469527552 }, { "epoch": 0.49, "learning_rate": 0.0005118008406078242, "loss": 1.3337, "theoretical_loss": 3.3717654615281463, "tokens_seen": 2470051840 }, { "epoch": 0.49, "learning_rate": 0.0005114775299062399, "loss": 1.3589, "theoretical_loss": 3.371708243894403, "tokens_seen": 2470576128 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.004040425177663565, "objective/train/docs_used": 1390574, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.523491382598877, "objective/train/original_loss": 2.523491382598877, "objective/train/theoretical_loss": 3.3716653408676676, "objective/train/tokens_used": 829828576, "objective/train/value_avg": -0.011993408203125, "objective/train/value_loss": 0.0007679046248085797, "objective/train/value_max": -0.00042724609375, "objective/train/value_min": -0.368896484375, "objective/train/value_reward_corr": 0.2829475259311581, "objective/train/value_std": 0.01238250732421875, "objective/train/weight_avg": 1.0004078149795532, "objective/train/weighted_lm_loss": 2.5250120162963867, "objective/train/weights_max": 1.0199838876724243, "objective/train/weights_min": 0.9363457560539246, "theoretical_loss": 3.3716653408676676, "tokens_seen": 2470969344 }, { "epoch": 0.49, "learning_rate": 0.0005111542192046556, "loss": 1.3585, "theoretical_loss": 3.371651041800715, "tokens_seen": 2471100416 }, { "epoch": 0.49, "learning_rate": 0.0005108309085030715, "loss": 1.35, "theoretical_loss": 3.3715938552395657, "tokens_seen": 2471624704 }, { "epoch": 0.49, "learning_rate": 0.0005105075978014872, "loss": 1.3162, "theoretical_loss": 3.371536684203445, "tokens_seen": 2472148992 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.009293721057474613, "objective/train/docs_used": 1391138, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.04416823387146, "objective/train/original_loss": 3.04416823387146, "objective/train/theoretical_loss": 3.3714866722763124, "objective/train/tokens_used": 831466976, "objective/train/value_avg": -0.0163421630859375, "objective/train/value_loss": 0.0015466674230992794, "objective/train/value_max": -0.0005211830139160156, "objective/train/value_min": -0.98828125, "objective/train/value_reward_corr": 0.6082766188556661, "objective/train/value_std": 0.03143310546875, "objective/train/weight_avg": 1.0009369850158691, "objective/train/weighted_lm_loss": 3.047593355178833, "objective/train/weights_max": 1.0698031187057495, "objective/train/weights_min": 0.9070869088172913, "theoretical_loss": 3.3714866722763124, "tokens_seen": 2472607744 }, { "epoch": 0.5, "learning_rate": 0.0005101842870999031, "loss": 1.3703, "theoretical_loss": 3.3714795286848456, "tokens_seen": 2472673280 }, { "epoch": 0.5, "learning_rate": 0.0005098609763983188, "loss": 1.3286, "theoretical_loss": 3.3714223886762675, "tokens_seen": 2473197568 }, { "epoch": 0.5, "learning_rate": 0.0005095376656967345, "loss": 1.3517, "theoretical_loss": 3.3713652641702154, "tokens_seen": 2473721856 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0060114567168056965, "objective/train/docs_used": 1392460, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.201882839202881, "objective/train/original_loss": 2.201882839202881, "objective/train/theoretical_loss": 3.371308155159199, "objective/train/tokens_used": 833105376, "objective/train/value_avg": -0.014190673828125, "objective/train/value_loss": 0.0012705207336694002, "objective/train/value_max": -0.0004372596740722656, "objective/train/value_min": -0.43017578125, "objective/train/value_reward_corr": 0.3640419353137629, "objective/train/value_std": 0.017669677734375, "objective/train/weight_avg": 1.0006074905395508, "objective/train/weighted_lm_loss": 2.2028799057006836, "objective/train/weights_max": 1.0399514436721802, "objective/train/weights_min": 0.956592857837677, "theoretical_loss": 3.371308155159199, "tokens_seen": 2474246144 }, { "epoch": 0.5, "learning_rate": 0.0005092143549951504, "loss": 1.3179, "theoretical_loss": 3.371308155159199, "tokens_seen": 2474246144 }, { "epoch": 0.5, "learning_rate": 0.0005088910442935661, "loss": 1.3508, "theoretical_loss": 3.371251061635733, "tokens_seen": 2474770432 }, { "epoch": 0.5, "learning_rate": 0.0005085677335919819, "loss": 1.3332, "theoretical_loss": 3.3711939835923372, "tokens_seen": 2475294720 }, { "epoch": 0.5, "learning_rate": 0.0005082444228903976, "loss": 1.3718, "theoretical_loss": 3.3711369210215376, "tokens_seen": 2475819008 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.008621352724730968, "objective/train/docs_used": 1393060, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6396567821502686, "objective/train/original_loss": 2.6396570205688477, "objective/train/theoretical_loss": 3.3711297892877328, "objective/train/tokens_used": 834743776, "objective/train/value_avg": -0.01922607421875, "objective/train/value_loss": 0.001905481331050396, "objective/train/value_max": -0.0004711151123046875, "objective/train/value_min": -0.9453125, "objective/train/value_reward_corr": 0.5863036424324711, "objective/train/value_std": 0.04290771484375, "objective/train/weight_avg": 1.0008716583251953, "objective/train/weighted_lm_loss": 2.6428630352020264, "objective/train/weights_max": 1.0785521268844604, "objective/train/weights_min": 0.9158862233161926, "theoretical_loss": 3.3711297892877328, "tokens_seen": 2475884544 }, { "epoch": 0.5, "learning_rate": 0.0005079211121888134, "loss": 1.3096, "theoretical_loss": 3.3710798739158645, "tokens_seen": 2476343296 }, { "epoch": 0.5, "learning_rate": 0.0005075978014872293, "loss": 1.304, "theoretical_loss": 3.3710228422678536, "tokens_seen": 2476867584 }, { "epoch": 0.5, "learning_rate": 0.000507274490785645, "loss": 1.3509, "theoretical_loss": 3.3709658260700457, "tokens_seen": 2477391872 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.008843754418194294, "objective/train/docs_used": 1394031, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.577333688735962, "objective/train/original_loss": 2.577333450317383, "objective/train/theoretical_loss": 3.370951574433814, "objective/train/tokens_used": 836382176, "objective/train/value_avg": -0.0167236328125, "objective/train/value_loss": 0.0017950913170352578, "objective/train/value_max": -0.00018668174743652344, "objective/train/value_min": -0.93994140625, "objective/train/value_reward_corr": 0.6088073898226278, "objective/train/value_std": 0.0341796875, "objective/train/weight_avg": 1.000893235206604, "objective/train/weighted_lm_loss": 2.579998016357422, "objective/train/weights_max": 1.0588659048080444, "objective/train/weights_min": 0.9073300361633301, "theoretical_loss": 3.370951574433814, "tokens_seen": 2477522944 }, { "epoch": 0.5, "learning_rate": 0.0005069511800840608, "loss": 1.3018, "theoretical_loss": 3.370908825314987, "tokens_seen": 2477916160 }, { "epoch": 0.5, "learning_rate": 0.0005066278693824767, "loss": 1.3324, "theoretical_loss": 3.3708518399952285, "tokens_seen": 2478440448 }, { "epoch": 0.5, "learning_rate": 0.0005063045586808923, "loss": 1.3368, "theoretical_loss": 3.370794870103327, "tokens_seen": 2478964736 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0037234060000628233, "objective/train/docs_used": 1394307, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8249852657318115, "objective/train/original_loss": 2.8249852657318115, "objective/train/theoretical_loss": 3.3707735103698395, "objective/train/tokens_used": 838020576, "objective/train/value_avg": -0.01441192626953125, "objective/train/value_loss": 0.002460404299199581, "objective/train/value_max": -0.0002694129943847656, "objective/train/value_min": -0.9453125, "objective/train/value_reward_corr": 0.49779915938313585, "objective/train/value_std": 0.022125244140625, "objective/train/weight_avg": 1.0003844499588013, "objective/train/weighted_lm_loss": 2.826305627822876, "objective/train/weights_max": 1.027646780014038, "objective/train/weights_min": 0.9165175557136536, "theoretical_loss": 3.3707735103698395, "tokens_seen": 2479161344 }, { "epoch": 0.5, "learning_rate": 0.0005059812479793081, "loss": 1.3561, "theoretical_loss": 3.3707379156318433, "tokens_seen": 2479489024 }, { "epoch": 0.5, "learning_rate": 0.0005056579372777239, "loss": 1.3255, "theoretical_loss": 3.370680976573345, "tokens_seen": 2480013312 }, { "epoch": 0.5, "learning_rate": 0.0005053346265761397, "loss": 1.368, "theoretical_loss": 3.370624052920404, "tokens_seen": 2480537600 }, { "debugging/Self-BLEU-5": 0.47238059473412053, "debugging/distinct-1-grams": 0.7926839475131697, "debugging/distinct-2-grams": 0.968383733577091, "debugging/entropy-1-grams": 5.825872253920538, "debugging/entropy-2-grams": 6.6945723419556975, "debugging/length": 497.72727272727275, "debugging/num_segments": 11, "debugging/raw_token_scores_avg": 0.02038160152733326, "debugging/raw_token_scores_std": 0.10787505656480789, "epoch": 0.5, "objective/train/advantage_avg": 0.004433360416442156, "objective/train/docs_used": 1395647, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7281088829040527, "objective/train/original_loss": 2.7281081676483154, "objective/train/theoretical_loss": 3.3705955968686974, "objective/train/tokens_used": 839658976, "objective/train/value_avg": -0.0247650146484375, "objective/train/value_loss": 0.005684974603354931, "objective/train/value_max": -0.0007977485656738281, "objective/train/value_min": -0.99609375, "objective/train/value_reward_corr": 0.7188714026836127, "objective/train/value_std": 0.0693359375, "objective/train/weight_avg": 1.0004712343215942, "objective/train/weighted_lm_loss": 2.7299394607543945, "objective/train/weights_max": 1.076916217803955, "objective/train/weights_min": 0.9074228405952454, "theoretical_loss": 3.3705955968686974, "tokens_seen": 2480799744 }, { "epoch": 0.5, "learning_rate": 0.0005050113158745556, "loss": 1.3442, "theoretical_loss": 3.3705671446655967, "tokens_seen": 2481061888 }, { "epoch": 0.5, "learning_rate": 0.0005046880051729712, "loss": 1.3766, "theoretical_loss": 3.3705102518015053, "tokens_seen": 2481586176 }, { "epoch": 0.5, "learning_rate": 0.000504364694471387, "loss": 1.3398, "theoretical_loss": 3.370453374320718, "tokens_seen": 2482110464 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.006826592609286308, "objective/train/docs_used": 1396325, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.837110757827759, "objective/train/original_loss": 2.837110757827759, "objective/train/theoretical_loss": 3.3704178337037676, "objective/train/tokens_used": 841297376, "objective/train/value_avg": -0.016876220703125, "objective/train/value_loss": 0.002940003527328372, "objective/train/value_max": -0.0004858970642089844, "objective/train/value_min": -0.97412109375, "objective/train/value_reward_corr": 0.48369380983009314, "objective/train/value_std": 0.03399658203125, "objective/train/weight_avg": 1.000697135925293, "objective/train/weighted_lm_loss": 2.8392491340637207, "objective/train/weights_max": 1.0463804006576538, "objective/train/weights_min": 0.9174717664718628, "theoretical_loss": 3.3704178337037676, "tokens_seen": 2482438144 }, { "epoch": 0.5, "learning_rate": 0.0005040413837698028, "loss": 1.3422, "theoretical_loss": 3.3703965122158257, "tokens_seen": 2482634752 }, { "epoch": 0.5, "learning_rate": 0.0005037180730682186, "loss": 1.3414, "theoretical_loss": 3.3703396654794275, "tokens_seen": 2483159040 }, { "epoch": 0.5, "learning_rate": 0.0005033947623666344, "loss": 1.3281, "theoretical_loss": 3.3702828341041258, "tokens_seen": 2483683328 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.004076053854078054, "objective/train/docs_used": 1397604, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6042885780334473, "objective/train/original_loss": 2.6042885780334473, "objective/train/theoretical_loss": 3.3702402206489213, "objective/train/tokens_used": 842935776, "objective/train/value_avg": -0.01219940185546875, "objective/train/value_loss": 0.003340983297675848, "objective/train/value_max": -0.0002512931823730469, "objective/train/value_min": -0.9921875, "objective/train/value_reward_corr": 0.5588898790324323, "objective/train/value_std": 0.03338623046875, "objective/train/weight_avg": 1.000423789024353, "objective/train/weighted_lm_loss": 2.6054391860961914, "objective/train/weights_max": 1.0386254787445068, "objective/train/weights_min": 0.9064955711364746, "theoretical_loss": 3.3702402206489213, "tokens_seen": 2484076544 }, { "epoch": 0.5, "learning_rate": 0.0005030714516650501, "loss": 1.2995, "theoretical_loss": 3.3702260180825276, "tokens_seen": 2484207616 }, { "epoch": 0.5, "learning_rate": 0.0005027481409634659, "loss": 1.3061, "theoretical_loss": 3.3701692174072466, "tokens_seen": 2484731904 }, { "epoch": 0.5, "learning_rate": 0.0005024248302618817, "loss": 1.3362, "theoretical_loss": 3.3701124320709006, "tokens_seen": 2485256192 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0020429377909749746, "objective/train/docs_used": 1398115, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.1889450550079346, "objective/train/original_loss": 2.1889450550079346, "objective/train/theoretical_loss": 3.3700627574785185, "objective/train/tokens_used": 844574176, "objective/train/value_avg": -0.020904541015625, "objective/train/value_loss": 0.005117880646139383, "objective/train/value_max": -0.0005273818969726562, "objective/train/value_min": -0.876953125, "objective/train/value_reward_corr": 0.49713211714656314, "objective/train/value_std": 0.03814697265625, "objective/train/weight_avg": 1.0002294778823853, "objective/train/weighted_lm_loss": 2.189232587814331, "objective/train/weights_max": 1.0520236492156982, "objective/train/weights_min": 0.9112446308135986, "theoretical_loss": 3.3700627574785185, "tokens_seen": 2485714944 }, { "epoch": 0.5, "learning_rate": 0.0005021015195602975, "loss": 1.3207, "theoretical_loss": 3.370055662066113, "tokens_seen": 2485780480 }, { "epoch": 0.5, "learning_rate": 0.0005017782088587132, "loss": 1.3563, "theoretical_loss": 3.3699989073855114, "tokens_seen": 2486304768 }, { "epoch": 0.5, "learning_rate": 0.000501454898157129, "loss": 1.3199, "theoretical_loss": 3.36994216802173, "tokens_seen": 2486829056 }, { "epoch": 0.5, "objective/train/advantage_avg": -0.01601576618850231, "objective/train/docs_used": 1398737, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5385818481445312, "objective/train/original_loss": 2.538581609725952, "objective/train/theoretical_loss": 3.369885443967407, "objective/train/tokens_used": 846212576, "objective/train/value_avg": -0.0188140869140625, "objective/train/value_loss": 0.007635174319148064, "objective/train/value_max": -0.00011324882507324219, "objective/train/value_min": -0.97265625, "objective/train/value_reward_corr": 0.5131129992707636, "objective/train/value_std": 0.0290374755859375, "objective/train/weight_avg": 0.9984359741210938, "objective/train/weighted_lm_loss": 2.538545608520508, "objective/train/weights_max": 1.0330055952072144, "objective/train/weights_min": 0.9116585850715637, "theoretical_loss": 3.369885443967407, "tokens_seen": 2487353344 }, { "epoch": 0.5, "learning_rate": 0.0005011315874555448, "loss": 1.3306, "theoretical_loss": 3.369885443967407, "tokens_seen": 2487353344 }, { "epoch": 0.5, "learning_rate": 0.0005008082767539606, "loss": 1.303, "theoretical_loss": 3.3698287352151852, "tokens_seen": 2487877632 }, { "epoch": 0.5, "learning_rate": 0.0005004849660523764, "loss": 1.3219, "theoretical_loss": 3.3697720417577144, "tokens_seen": 2488401920 }, { "epoch": 0.5, "learning_rate": 0.0005001616553507921, "loss": 1.307, "theoretical_loss": 3.369715363587648, "tokens_seen": 2488926208 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.006201360374689102, "objective/train/docs_used": 1399945, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3065996170043945, "objective/train/original_loss": 2.3065996170043945, "objective/train/theoretical_loss": 3.3697082798909195, "objective/train/tokens_used": 847850976, "objective/train/value_avg": -0.0223846435546875, "objective/train/value_loss": 0.004414277616888285, "objective/train/value_max": -0.0006189346313476562, "objective/train/value_min": -0.9111328125, "objective/train/value_reward_corr": 0.5159124771770389, "objective/train/value_std": 0.03875732421875, "objective/train/weight_avg": 1.0006418228149414, "objective/train/weighted_lm_loss": 2.3079748153686523, "objective/train/weights_max": 1.0376536846160889, "objective/train/weights_min": 0.9057582020759583, "theoretical_loss": 3.3697082798909195, "tokens_seen": 2488991744 }, { "epoch": 0.51, "learning_rate": 0.0004998383446492079, "loss": 1.3035, "theoretical_loss": 3.369658700697644, "tokens_seen": 2489450496 }, { "epoch": 0.51, "learning_rate": 0.0004995150339476236, "loss": 1.3274, "theoretical_loss": 3.369602053080367, "tokens_seen": 2489974784 }, { "epoch": 0.51, "learning_rate": 0.0004991917232460394, "loss": 1.3136, "theoretical_loss": 3.369545420728486, "tokens_seen": 2490499072 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.009864665567874908, "objective/train/docs_used": 1400696, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 3.1195993423461914, "objective/train/original_loss": 3.1195991039276123, "objective/train/theoretical_loss": 3.3695312650248748, "objective/train/tokens_used": 849489376, "objective/train/value_avg": -0.01493072509765625, "objective/train/value_loss": 0.001787702552974224, "objective/train/value_max": -0.0008296966552734375, "objective/train/value_min": -0.98291015625, "objective/train/value_reward_corr": 0.37868558326605706, "objective/train/value_std": 0.02545166015625, "objective/train/weight_avg": 1.0009952783584595, "objective/train/weighted_lm_loss": 3.122983932495117, "objective/train/weights_max": 1.100014567375183, "objective/train/weights_min": 0.9090522527694702, "theoretical_loss": 3.3695312650248748, "tokens_seen": 2490630144 }, { "epoch": 0.51, "learning_rate": 0.0004988684125444553, "loss": 1.357, "theoretical_loss": 3.3694888036346744, "tokens_seen": 2491023360 }, { "epoch": 0.51, "learning_rate": 0.000498545101842871, "loss": 1.345, "theoretical_loss": 3.3694322017916116, "tokens_seen": 2491547648 }, { "epoch": 0.51, "learning_rate": 0.0004982217911412868, "loss": 1.3471, "theoretical_loss": 3.3693756151919816, "tokens_seen": 2492071936 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.005577422212809324, "objective/train/docs_used": 1402287, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.79001522064209, "objective/train/original_loss": 2.790015697479248, "objective/train/theoretical_loss": 3.3693543991455757, "objective/train/tokens_used": 851127776, "objective/train/value_avg": -0.028839111328125, "objective/train/value_loss": 0.003583439625799656, "objective/train/value_max": -0.00064849853515625, "objective/train/value_min": -0.50390625, "objective/train/value_reward_corr": 0.5607538359680978, "objective/train/value_std": 0.043853759765625, "objective/train/weight_avg": 1.0005755424499512, "objective/train/weighted_lm_loss": 2.7922308444976807, "objective/train/weights_max": 1.0344816446304321, "objective/train/weights_min": 0.9149291515350342, "theoretical_loss": 3.3693543991455757, "tokens_seen": 2492268544 }, { "epoch": 0.51, "learning_rate": 0.0004978984804397025, "loss": 1.312, "theoretical_loss": 3.3693190438284737, "tokens_seen": 2492596224 }, { "epoch": 0.51, "learning_rate": 0.0004975751697381183, "loss": 1.3078, "theoretical_loss": 3.3692624876937822, "tokens_seen": 2493120512 }, { "epoch": 0.51, "learning_rate": 0.0004972518590365342, "loss": 1.3405, "theoretical_loss": 3.369205946780606, "tokens_seen": 2493644800 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.00751195102930069, "objective/train/docs_used": 1402946, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3508288860321045, "objective/train/original_loss": 2.3508286476135254, "objective/train/theoretical_loss": 3.3691776820298056, "objective/train/tokens_used": 852766176, "objective/train/value_avg": -0.0185394287109375, "objective/train/value_loss": 0.0027460595592856407, "objective/train/value_max": -0.000614166259765625, "objective/train/value_min": -0.99365234375, "objective/train/value_reward_corr": 0.657545522329845, "objective/train/value_std": 0.04833984375, "objective/train/weight_avg": 1.0007647275924683, "objective/train/weighted_lm_loss": 2.3532562255859375, "objective/train/weights_max": 1.0724738836288452, "objective/train/weights_min": 0.9099907875061035, "theoretical_loss": 3.3691776820298056, "tokens_seen": 2493906944 }, { "epoch": 0.51, "learning_rate": 0.0004969285483349499, "loss": 1.2669, "theoretical_loss": 3.369149421081649, "tokens_seen": 2494169088 }, { "epoch": 0.51, "learning_rate": 0.0004966052376333657, "loss": 1.2831, "theoretical_loss": 3.3690929105896212, "tokens_seen": 2494693376 }, { "epoch": 0.51, "learning_rate": 0.0004962819269317814, "loss": 1.2886, "theoretical_loss": 3.369036415297237, "tokens_seen": 2495217664 }, { "epoch": 0.51, "objective/train/advantage_avg": -0.005658740643411875, "objective/train/docs_used": 1404179, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3830184936523438, "objective/train/original_loss": 2.383018732070923, "objective/train/theoretical_loss": 3.3690011134548303, "objective/train/tokens_used": 854404576, "objective/train/value_avg": -0.012664794921875, "objective/train/value_loss": 0.003825581166893244, "objective/train/value_max": -0.0002868175506591797, "objective/train/value_min": -0.60498046875, "objective/train/value_reward_corr": 0.3792012586452014, "objective/train/value_std": 0.014892578125, "objective/train/weight_avg": 0.9994530081748962, "objective/train/weighted_lm_loss": 2.3831043243408203, "objective/train/weights_max": 1.0172470808029175, "objective/train/weights_min": 0.9310697913169861, "theoretical_loss": 3.3690011134548303, "tokens_seen": 2495545344 }, { "epoch": 0.51, "learning_rate": 0.0004959586162301972, "loss": 1.2935, "theoretical_loss": 3.3689799351972147, "tokens_seen": 2495741952 }, { "epoch": 0.51, "learning_rate": 0.0004956353055286131, "loss": 1.3179, "theoretical_loss": 3.36892347028228, "tokens_seen": 2496266240 }, { "epoch": 0.51, "learning_rate": 0.0004953119948270287, "loss": 1.3027, "theoretical_loss": 3.368867020545161, "tokens_seen": 2496790528 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.008993473835289478, "objective/train/docs_used": 1405043, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.69334077835083, "objective/train/original_loss": 2.69334077835083, "objective/train/theoretical_loss": 3.368824693198393, "objective/train/tokens_used": 856042976, "objective/train/value_avg": -0.0166778564453125, "objective/train/value_loss": 0.0004254228842910379, "objective/train/value_max": -0.0003712177276611328, "objective/train/value_min": -0.290283203125, "objective/train/value_reward_corr": 0.6293576995575334, "objective/train/value_std": 0.0213775634765625, "objective/train/weight_avg": 1.000901460647583, "objective/train/weighted_lm_loss": 2.6965861320495605, "objective/train/weights_max": 1.029173493385315, "objective/train/weights_min": 0.9856563806533813, "theoretical_loss": 3.368824693198393, "tokens_seen": 2497183744 }, { "epoch": 0.51, "learning_rate": 0.0004949886841254446, "loss": 1.3068, "theoretical_loss": 3.368810585978593, "tokens_seen": 2497314816 }, { "epoch": 0.51, "learning_rate": 0.0004946653734238603, "loss": 1.2954, "theoretical_loss": 3.368754166575315, "tokens_seen": 2497839104 }, { "epoch": 0.51, "learning_rate": 0.0004943420627222761, "loss": 1.286, "theoretical_loss": 3.368697762328071, "tokens_seen": 2498363392 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.0020976460073143244, "objective/train/docs_used": 1405984, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.6760668754577637, "objective/train/original_loss": 2.6760666370391846, "objective/train/theoretical_loss": 3.368648421038717, "objective/train/tokens_used": 857681376, "objective/train/value_avg": -0.0246124267578125, "objective/train/value_loss": 0.005285078659653664, "objective/train/value_max": -0.0003936290740966797, "objective/train/value_min": -0.9921875, "objective/train/value_reward_corr": 0.4316332968516005, "objective/train/value_std": 0.043701171875, "objective/train/weight_avg": 1.0002357959747314, "objective/train/weighted_lm_loss": 2.6792397499084473, "objective/train/weights_max": 1.0943039655685425, "objective/train/weights_min": 0.9067699909210205, "theoretical_loss": 3.368648421038717, "tokens_seen": 2498822144 }, { "epoch": 0.51, "learning_rate": 0.000494018752020692, "loss": 1.2892, "theoretical_loss": 3.368641373229611, "tokens_seen": 2498887680 }, { "epoch": 0.51, "learning_rate": 0.0004936954413191076, "loss": 1.2931, "theoretical_loss": 3.3685849992726893, "tokens_seen": 2499411968 }, { "epoch": 0.51, "learning_rate": 0.0004933721306175235, "loss": 1.2747, "theoretical_loss": 3.368528640450065, "tokens_seen": 2499936256 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.0057440344244241714, "objective/train/docs_used": 1406694, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3535380363464355, "objective/train/original_loss": 2.3535380363464355, "objective/train/theoretical_loss": 3.368472296754502, "objective/train/tokens_used": 859319776, "objective/train/value_avg": -0.0299072265625, "objective/train/value_loss": 0.0036401604302227497, "objective/train/value_max": -0.0004954338073730469, "objective/train/value_min": -0.95361328125, "objective/train/value_reward_corr": 0.728678892995649, "objective/train/value_std": 0.06939697265625, "objective/train/weight_avg": 1.0005924701690674, "objective/train/weighted_lm_loss": 2.354731798171997, "objective/train/weights_max": 1.0806093215942383, "objective/train/weights_min": 0.9068262577056885, "theoretical_loss": 3.368472296754502, "tokens_seen": 2500460544 }, { "epoch": 0.51, "learning_rate": 0.0004930488199159392, "loss": 1.3012, "theoretical_loss": 3.368472296754502, "tokens_seen": 2500460544 }, { "epoch": 0.51, "learning_rate": 0.000492725509214355, "loss": 1.3079, "theoretical_loss": 3.36841596817877, "tokens_seen": 2500984832 }, { "epoch": 0.51, "learning_rate": 0.0004924021985127707, "loss": 1.2942, "theoretical_loss": 3.368359654715644, "tokens_seen": 2501509120 }, { "epoch": 0.51, "learning_rate": 0.0004920788878111865, "loss": 1.33, "theoretical_loss": 3.368303356357902, "tokens_seen": 2502033408 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.005459635052829981, "objective/train/docs_used": 1408162, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.672835350036621, "objective/train/original_loss": 2.672835111618042, "objective/train/theoretical_loss": 3.368296320124922, "objective/train/tokens_used": 860958176, "objective/train/value_avg": -0.00943756103515625, "objective/train/value_loss": 0.00037306337617337704, "objective/train/value_max": -0.00028252601623535156, "objective/train/value_min": -0.51611328125, "objective/train/value_reward_corr": 0.12441999540658746, "objective/train/value_std": 0.00878143310546875, "objective/train/weight_avg": 1.0005478858947754, "objective/train/weighted_lm_loss": 2.674823760986328, "objective/train/weights_max": 1.0329195261001587, "objective/train/weights_min": 0.9780640006065369, "theoretical_loss": 3.368296320124922, "tokens_seen": 2502098944 }, { "epoch": 0.51, "learning_rate": 0.0004917555771096024, "loss": 1.3011, "theoretical_loss": 3.3682470730983285, "tokens_seen": 2502557696 }, { "epoch": 0.51, "learning_rate": 0.0004914322664080181, "loss": 1.2529, "theoretical_loss": 3.3681908049297133, "tokens_seen": 2503081984 }, { "epoch": 0.51, "learning_rate": 0.0004911089557064339, "loss": 1.304, "theoretical_loss": 3.3681345518448498, "tokens_seen": 2503606272 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.007376638241112232, "objective/train/docs_used": 1408714, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.694563627243042, "objective/train/original_loss": 2.6945641040802, "objective/train/theoretical_loss": 3.368120490929626, "objective/train/tokens_used": 862596576, "objective/train/value_avg": -0.016021728515625, "objective/train/value_loss": 0.00199099350720644, "objective/train/value_max": -0.0005507469177246094, "objective/train/value_min": -0.9609375, "objective/train/value_reward_corr": 0.6986046983635202, "objective/train/value_std": 0.038177490234375, "objective/train/weight_avg": 1.0007474422454834, "objective/train/weighted_lm_loss": 2.697284698486328, "objective/train/weights_max": 1.054128885269165, "objective/train/weights_min": 0.9083310961723328, "theoretical_loss": 3.368120490929626, "tokens_seen": 2503737344 }, { "epoch": 0.51, "learning_rate": 0.0004907856450048496, "loss": 1.2976, "theoretical_loss": 3.368078313836537, "tokens_seen": 2504130560 }, { "epoch": 0.51, "learning_rate": 0.0004904623343032654, "loss": 1.3089, "theoretical_loss": 3.3680220908975795, "tokens_seen": 2504654848 }, { "epoch": 0.51, "learning_rate": 0.0004901390236016813, "loss": 1.2921, "theoretical_loss": 3.3679658830207857, "tokens_seen": 2505179136 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.008690002374351025, "objective/train/docs_used": 1409820, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.571951389312744, "objective/train/original_loss": 2.5719516277313232, "objective/train/theoretical_loss": 3.3679448089487356, "objective/train/tokens_used": 864234976, "objective/train/value_avg": -0.0188751220703125, "objective/train/value_loss": 0.0013417444424703717, "objective/train/value_max": -0.0003077983856201172, "objective/train/value_min": -0.8955078125, "objective/train/value_reward_corr": 0.5848107003178482, "objective/train/value_std": 0.0284271240234375, "objective/train/weight_avg": 1.00087571144104, "objective/train/weighted_lm_loss": 2.5742692947387695, "objective/train/weights_max": 1.0432934761047363, "objective/train/weights_min": 0.9178553223609924, "theoretical_loss": 3.3679448089487356, "tokens_seen": 2505375744 }, { "epoch": 0.52, "learning_rate": 0.000489815712900097, "loss": 1.3295, "theoretical_loss": 3.36790969019897, "tokens_seen": 2505703424 }, { "epoch": 0.52, "learning_rate": 0.0004894924021985128, "loss": 1.2981, "theoretical_loss": 3.3678535124249507, "tokens_seen": 2506227712 }, { "epoch": 0.52, "learning_rate": 0.0004891690914969285, "loss": 1.3045, "theoretical_loss": 3.3677973496915516, "tokens_seen": 2506752000 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.01079891063272953, "objective/train/docs_used": 1410261, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.432738780975342, "objective/train/original_loss": 2.4327385425567627, "objective/train/theoretical_loss": 3.3677692739628435, "objective/train/tokens_used": 865873376, "objective/train/value_avg": -0.015228271484375, "objective/train/value_loss": 0.0004751178785227239, "objective/train/value_max": -0.00033545494079589844, "objective/train/value_min": -0.363525390625, "objective/train/value_reward_corr": 0.47087924390091807, "objective/train/value_std": 0.0203704833984375, "objective/train/weight_avg": 1.0010823011398315, "objective/train/weighted_lm_loss": 2.435434341430664, "objective/train/weights_max": 1.0317929983139038, "objective/train/weights_min": 0.9646025896072388, "theoretical_loss": 3.3677692739628435, "tokens_seen": 2507014144 }, { "epoch": 0.52, "learning_rate": 0.0004888457807953444, "loss": 1.302, "theoretical_loss": 3.3677412019916018, "tokens_seen": 2507276288 }, { "epoch": 0.52, "learning_rate": 0.0004885224700937601, "loss": 1.3061, "theoretical_loss": 3.367685069317934, "tokens_seen": 2507800576 }, { "epoch": 0.52, "learning_rate": 0.0004881991593921759, "loss": 1.2523, "theoretical_loss": 3.3676289516633875, "tokens_seen": 2508324864 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.0012257708003744483, "objective/train/docs_used": 1411438, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.779358386993408, "objective/train/original_loss": 2.779358386993408, "objective/train/theoretical_loss": 3.367593885753013, "objective/train/tokens_used": 867511776, "objective/train/value_avg": -0.02020263671875, "objective/train/value_loss": 0.004138914868235588, "objective/train/value_max": -0.0006093978881835938, "objective/train/value_min": -0.5595703125, "objective/train/value_reward_corr": 0.6196044421993645, "objective/train/value_std": 0.033416748046875, "objective/train/weight_avg": 1.000143051147461, "objective/train/weighted_lm_loss": 2.7801170349121094, "objective/train/weights_max": 1.0319361686706543, "objective/train/weights_min": 0.9223573803901672, "theoretical_loss": 3.367593885753013, "tokens_seen": 2508652544 }, { "epoch": 0.52, "learning_rate": 0.0004878758486905916, "loss": 1.3133, "theoretical_loss": 3.3675728490208052, "tokens_seen": 2508849152 }, { "epoch": 0.52, "learning_rate": 0.00048755253798900744, "loss": 1.3142, "theoretical_loss": 3.367516761383036, "tokens_seen": 2509373440 }, { "epoch": 0.52, "learning_rate": 0.00048722922728742325, "loss": 1.2837, "theoretical_loss": 3.367460688742932, "tokens_seen": 2509897728 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.006968910340219736, "objective/train/docs_used": 1412115, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.644298791885376, "objective/train/original_loss": 2.644298553466797, "objective/train/theoretical_loss": 3.367418644100776, "objective/train/tokens_used": 869150176, "objective/train/value_avg": -0.01418304443359375, "objective/train/value_loss": 0.0014383437810465693, "objective/train/value_max": -0.0002551078796386719, "objective/train/value_min": -0.9384765625, "objective/train/value_reward_corr": 0.5735177494095206, "objective/train/value_std": 0.0258331298828125, "objective/train/weight_avg": 1.0007039308547974, "objective/train/weighted_lm_loss": 2.646446943283081, "objective/train/weights_max": 1.0270377397537231, "objective/train/weights_min": 0.9219714999198914, "theoretical_loss": 3.367418644100776, "tokens_seen": 2510290944 }, { "epoch": 0.52, "learning_rate": 0.000486905916585839, "loss": 1.3386, "theoretical_loss": 3.3674046310933523, "tokens_seen": 2510422016 }, { "epoch": 0.52, "learning_rate": 0.00048658260588425477, "loss": 1.3057, "theoretical_loss": 3.3673485884271597, "tokens_seen": 2510946304 }, { "epoch": 0.52, "learning_rate": 0.0004862592951826705, "loss": 1.3506, "theoretical_loss": 3.367292560737222, "tokens_seen": 2511470592 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.008084574714303017, "objective/train/docs_used": 1412775, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.754493236541748, "objective/train/original_loss": 2.754492998123169, "objective/train/theoretical_loss": 3.367243548788132, "objective/train/tokens_used": 870788576, "objective/train/value_avg": -0.01629638671875, "objective/train/value_loss": 0.0018216700991615653, "objective/train/value_max": -0.0006642341613769531, "objective/train/value_min": -0.89697265625, "objective/train/value_reward_corr": 0.5121423210089158, "objective/train/value_std": 0.0269317626953125, "objective/train/weight_avg": 1.0008175373077393, "objective/train/weighted_lm_loss": 2.7573108673095703, "objective/train/weights_max": 1.0517960786819458, "objective/train/weights_min": 0.9158148169517517, "theoretical_loss": 3.367243548788132, "tokens_seen": 2511929344 }, { "epoch": 0.52, "learning_rate": 0.00048593598448108634, "loss": 1.3337, "theoretical_loss": 3.3672365480164115, "tokens_seen": 2511994880 }, { "epoch": 0.52, "learning_rate": 0.00048561267377950215, "loss": 1.2919, "theoretical_loss": 3.367180550257606, "tokens_seen": 2512519168 }, { "epoch": 0.52, "learning_rate": 0.0004852893630779179, "loss": 1.3342, "theoretical_loss": 3.3671245674536885, "tokens_seen": 2513043456 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.002048224676400423, "objective/train/docs_used": 1413871, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7483975887298584, "objective/train/original_loss": 2.7483975887298584, "objective/train/theoretical_loss": 3.3670685995975465, "objective/train/tokens_used": 872426976, "objective/train/value_avg": -0.0173492431640625, "objective/train/value_loss": 0.006170226261019707, "objective/train/value_max": -0.00033020973205566406, "objective/train/value_min": -0.9892578125, "objective/train/value_reward_corr": 0.5463276797689538, "objective/train/value_std": 0.04486083984375, "objective/train/weight_avg": 1.0002349615097046, "objective/train/weighted_lm_loss": 2.7489938735961914, "objective/train/weights_max": 1.046826958656311, "objective/train/weights_min": 0.9125953316688538, "theoretical_loss": 3.3670685995975465, "tokens_seen": 2513567744 }, { "epoch": 0.52, "learning_rate": 0.00048496605237633366, "loss": 1.3363, "theoretical_loss": 3.3670685995975465, "tokens_seen": 2513567744 }, { "epoch": 0.52, "learning_rate": 0.0004846427416747494, "loss": 1.276, "theoretical_loss": 3.367012646682071, "tokens_seen": 2514092032 }, { "epoch": 0.52, "learning_rate": 0.00048431943097316523, "loss": 1.3231, "theoretical_loss": 3.3669567087001604, "tokens_seen": 2514616320 }, { "epoch": 0.52, "learning_rate": 0.00048399612027158104, "loss": 1.2644, "theoretical_loss": 3.366900785644716, "tokens_seen": 2515140608 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.00601638900116086, "objective/train/docs_used": 1414570, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.913867235183716, "objective/train/original_loss": 2.9138662815093994, "objective/train/theoretical_loss": 3.3668937963119494, "objective/train/tokens_used": 874065376, "objective/train/value_avg": -0.016998291015625, "objective/train/value_loss": 0.002518479246646166, "objective/train/value_max": -0.0002779960632324219, "objective/train/value_min": -0.76318359375, "objective/train/value_reward_corr": 0.6524305840683795, "objective/train/value_std": 0.03564453125, "objective/train/weight_avg": 1.000614047050476, "objective/train/weighted_lm_loss": 2.9157702922821045, "objective/train/weights_max": 1.0756688117980957, "objective/train/weights_min": 0.9180559515953064, "theoretical_loss": 3.3668937963119494, "tokens_seen": 2515206144 }, { "epoch": 0.52, "learning_rate": 0.00048367280956999675, "loss": 1.3321, "theoretical_loss": 3.366844877508645, "tokens_seen": 2515664896 }, { "epoch": 0.52, "learning_rate": 0.00048334949886841256, "loss": 1.3261, "theoretical_loss": 3.3667889842848586, "tokens_seen": 2516189184 }, { "epoch": 0.52, "learning_rate": 0.0004830261881668283, "loss": 1.3053, "theoretical_loss": 3.366733105966274, "tokens_seen": 2516713472 }, { "epoch": 0.52, "objective/train/advantage_avg": -0.0007845271611586213, "objective/train/docs_used": 1415934, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.2233126163482666, "objective/train/original_loss": 2.2233121395111084, "objective/train/theoretical_loss": 3.3667191387147355, "objective/train/tokens_used": 875703776, "objective/train/value_avg": -0.0311431884765625, "objective/train/value_loss": 0.005564854480326176, "objective/train/value_max": -0.0003554821014404297, "objective/train/value_min": -0.99658203125, "objective/train/value_reward_corr": 0.8255723926221739, "objective/train/value_std": 0.10369873046875, "objective/train/weight_avg": 0.9999489188194275, "objective/train/weighted_lm_loss": 2.2231943607330322, "objective/train/weights_max": 1.054586410522461, "objective/train/weights_min": 0.9225736260414124, "theoretical_loss": 3.3667191387147355, "tokens_seen": 2516844544 }, { "epoch": 0.52, "learning_rate": 0.00048270287746524413, "loss": 1.2761, "theoretical_loss": 3.3666772425458116, "tokens_seen": 2517237760 }, { "epoch": 0.52, "learning_rate": 0.0004823795667636599, "loss": 1.3075, "theoretical_loss": 3.366621394016398, "tokens_seen": 2517762048 }, { "epoch": 0.52, "learning_rate": 0.00048205625606207564, "loss": 1.325, "theoretical_loss": 3.3665655603709643, "tokens_seen": 2518286336 }, { "epoch": 0.52, "objective/train/advantage_avg": -0.008065950125455856, "objective/train/docs_used": 1416375, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.8705086708068848, "objective/train/original_loss": 2.8705081939697266, "objective/train/theoretical_loss": 3.366544626589761, "objective/train/tokens_used": 877342176, "objective/train/value_avg": -0.024505615234375, "objective/train/value_loss": 0.005569861736148596, "objective/train/value_max": -0.0006747245788574219, "objective/train/value_min": -0.779296875, "objective/train/value_reward_corr": 0.6468363260577948, "objective/train/value_std": 0.034881591796875, "objective/train/weight_avg": 0.9992209076881409, "objective/train/weighted_lm_loss": 2.867307186126709, "objective/train/weights_max": 1.0353795289993286, "objective/train/weights_min": 0.9161062240600586, "theoretical_loss": 3.366544626589761, "tokens_seen": 2518482944 }, { "epoch": 0.52, "learning_rate": 0.00048173294536049146, "loss": 1.3047, "theoretical_loss": 3.3665097416024468, "tokens_seen": 2518810624 }, { "epoch": 0.52, "learning_rate": 0.00048140963465890727, "loss": 1.2542, "theoretical_loss": 3.366453937703785, "tokens_seen": 2519334912 }, { "epoch": 0.52, "learning_rate": 0.00048108632395732297, "loss": 1.3047, "theoretical_loss": 3.3663981486679257, "tokens_seen": 2519859200 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.002227901015430689, "objective/train/docs_used": 1417108, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4191396236419678, "objective/train/original_loss": 2.4191393852233887, "objective/train/theoretical_loss": 3.3663702597213434, "objective/train/tokens_used": 878980576, "objective/train/value_avg": -0.0226287841796875, "objective/train/value_loss": 0.0033848534803837538, "objective/train/value_max": -0.0002715587615966797, "objective/train/value_min": -0.8349609375, "objective/train/value_reward_corr": 0.4376130575158179, "objective/train/value_std": 0.02764892578125, "objective/train/weight_avg": 1.0002394914627075, "objective/train/weighted_lm_loss": 2.4199371337890625, "objective/train/weights_max": 1.0454668998718262, "objective/train/weights_min": 0.9137119650840759, "theoretical_loss": 3.3663702597213434, "tokens_seen": 2520121344 }, { "epoch": 0.52, "learning_rate": 0.0004807630132557388, "loss": 1.3165, "theoretical_loss": 3.366342374487818, "tokens_seen": 2520383488 }, { "epoch": 0.52, "learning_rate": 0.00048043970255415454, "loss": 1.33, "theoretical_loss": 3.366286615156418, "tokens_seen": 2520907776 }, { "epoch": 0.52, "learning_rate": 0.00048011639185257035, "loss": 1.2714, "theoretical_loss": 3.3662308706666844, "tokens_seen": 2521432064 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.009898852556943893, "objective/train/docs_used": 1417967, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3463656902313232, "objective/train/original_loss": 2.3463659286499023, "objective/train/theoretical_loss": 3.3661960378942593, "objective/train/tokens_used": 880618976, "objective/train/value_avg": -0.0157318115234375, "objective/train/value_loss": 0.0010717485565692186, "objective/train/value_max": -0.0002982616424560547, "objective/train/value_min": -0.484619140625, "objective/train/value_reward_corr": 0.5876282654140486, "objective/train/value_std": 0.0197906494140625, "objective/train/weight_avg": 1.00099515914917, "objective/train/weighted_lm_loss": 2.3489487171173096, "objective/train/weights_max": 1.0242244005203247, "objective/train/weights_min": 0.9221334457397461, "theoretical_loss": 3.3661960378942593, "tokens_seen": 2521759744 }, { "epoch": 0.53, "learning_rate": 0.0004797930811509861, "loss": 1.2619, "theoretical_loss": 3.366175141011583, "tokens_seen": 2521956352 }, { "epoch": 0.53, "learning_rate": 0.00047946977044940187, "loss": 1.2728, "theoretical_loss": 3.366119426184083, "tokens_seen": 2522480640 }, { "epoch": 0.53, "learning_rate": 0.0004791464597478177, "loss": 1.3203, "theoretical_loss": 3.366063726177159, "tokens_seen": 2523004928 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.01015600934624672, "objective/train/docs_used": 1418616, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.529352903366089, "objective/train/original_loss": 2.5293526649475098, "objective/train/theoretical_loss": 3.3660219608937445, "objective/train/tokens_used": 882257376, "objective/train/value_avg": -0.02081298828125, "objective/train/value_loss": 0.0026060654781758785, "objective/train/value_max": -0.0006589889526367188, "objective/train/value_min": -0.9951171875, "objective/train/value_reward_corr": 0.5448450818131119, "objective/train/value_std": 0.050567626953125, "objective/train/weight_avg": 1.0010285377502441, "objective/train/weighted_lm_loss": 2.53149676322937, "objective/train/weights_max": 1.089402675628662, "objective/train/weights_min": 0.9336770176887512, "theoretical_loss": 3.3660219608937445, "tokens_seen": 2523398144 }, { "epoch": 0.53, "learning_rate": 0.00047882314904623344, "loss": 1.2878, "theoretical_loss": 3.366008040983789, "tokens_seen": 2523529216 }, { "epoch": 0.53, "learning_rate": 0.0004784998383446492, "loss": 1.2816, "theoretical_loss": 3.365952370596958, "tokens_seen": 2524053504 }, { "epoch": 0.53, "learning_rate": 0.000478176527643065, "loss": 1.3033, "theoretical_loss": 3.3658967150096544, "tokens_seen": 2524577792 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.008961337618529797, "objective/train/docs_used": 1419908, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.5036611557006836, "objective/train/original_loss": 2.5036613941192627, "objective/train/theoretical_loss": 3.365848028505492, "objective/train/tokens_used": 883895776, "objective/train/value_avg": -0.016815185546875, "objective/train/value_loss": 0.0013623384293168783, "objective/train/value_max": -0.00031256675720214844, "objective/train/value_min": -0.6142578125, "objective/train/value_reward_corr": 0.29208425074307237, "objective/train/value_std": 0.0196685791015625, "objective/train/weight_avg": 1.000902771949768, "objective/train/weighted_lm_loss": 2.5065276622772217, "objective/train/weights_max": 1.0250509977340698, "objective/train/weights_min": 0.9225173592567444, "theoretical_loss": 3.365848028505492, "tokens_seen": 2525036544 }, { "epoch": 0.53, "learning_rate": 0.00047785321694148076, "loss": 1.3296, "theoretical_loss": 3.3658410742148708, "tokens_seen": 2525102080 }, { "epoch": 0.53, "learning_rate": 0.0004775299062398966, "loss": 1.29, "theoretical_loss": 3.365785448205606, "tokens_seen": 2525626368 }, { "epoch": 0.53, "learning_rate": 0.0004772065955383123, "loss": 1.3378, "theoretical_loss": 3.3657298369748636, "tokens_seen": 2526150656 }, { "epoch": 0.53, "objective/train/advantage_avg": 2.5077648388105445e-05, "objective/train/docs_used": 1420528, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.243234157562256, "objective/train/original_loss": 2.243234157562256, "objective/train/theoretical_loss": 3.3656742405156503, "objective/train/tokens_used": 885534176, "objective/train/value_avg": -0.0139007568359375, "objective/train/value_loss": 0.0022917822934687138, "objective/train/value_max": -0.00036263465881347656, "objective/train/value_min": -0.7099609375, "objective/train/value_reward_corr": 0.38912804370525617, "objective/train/value_std": 0.0221099853515625, "objective/train/weight_avg": 1.000013828277588, "objective/train/weighted_lm_loss": 2.2436628341674805, "objective/train/weights_max": 1.0422171354293823, "objective/train/weights_min": 0.9250050187110901, "theoretical_loss": 3.3656742405156503, "tokens_seen": 2526674944 }, { "epoch": 0.53, "learning_rate": 0.0004768832848367281, "loss": 1.3074, "theoretical_loss": 3.3656742405156503, "tokens_seen": 2526674944 }, { "epoch": 0.53, "learning_rate": 0.0004765599741351439, "loss": 1.2788, "theoretical_loss": 3.365618658820979, "tokens_seen": 2527199232 }, { "epoch": 0.53, "learning_rate": 0.00047623666343355966, "loss": 1.2594, "theoretical_loss": 3.365563091883867, "tokens_seen": 2527723520 }, { "epoch": 0.53, "learning_rate": 0.00047591335273197547, "loss": 1.2858, "theoretical_loss": 3.365507539697336, "tokens_seen": 2528247808 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0004907023976556957, "objective/train/docs_used": 1420951, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.609449863433838, "objective/train/original_loss": 2.6094493865966797, "objective/train/theoretical_loss": 3.365500596710823, "objective/train/tokens_used": 887172576, "objective/train/value_avg": -0.020751953125, "objective/train/value_loss": 0.007044467143714428, "objective/train/value_max": -0.0002892017364501953, "objective/train/value_min": -0.99560546875, "objective/train/value_reward_corr": 0.5107248916329288, "objective/train/value_std": 0.046051025390625, "objective/train/weight_avg": 1.000083565711975, "objective/train/weighted_lm_loss": 2.609130620956421, "objective/train/weights_max": 1.0892343521118164, "objective/train/weights_min": 0.9077451825141907, "theoretical_loss": 3.365500596710823, "tokens_seen": 2528313344 }, { "epoch": 0.53, "learning_rate": 0.0004755900420303912, "loss": 1.2632, "theoretical_loss": 3.3654520022544134, "tokens_seen": 2528772096 }, { "epoch": 0.53, "learning_rate": 0.000475266731328807, "loss": 1.3044, "theoretical_loss": 3.3653964795481297, "tokens_seen": 2529296384 }, { "epoch": 0.53, "learning_rate": 0.0004749434206272228, "loss": 1.2709, "theoretical_loss": 3.365340971571522, "tokens_seen": 2529820672 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.001959994900971651, "objective/train/docs_used": 1422170, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.1092700958251953, "objective/train/original_loss": 2.109269618988037, "objective/train/theoretical_loss": 3.365327096878066, "objective/train/tokens_used": 888810976, "objective/train/value_avg": -0.01275634765625, "objective/train/value_loss": 0.0020290419925004244, "objective/train/value_max": -0.0003249645233154297, "objective/train/value_min": -0.99462890625, "objective/train/value_reward_corr": 0.566920236979993, "objective/train/value_std": 0.0266876220703125, "objective/train/weight_avg": 1.0002058744430542, "objective/train/weighted_lm_loss": 2.1098742485046387, "objective/train/weights_max": 1.032643437385559, "objective/train/weights_min": 0.9098082184791565, "theoretical_loss": 3.365327096878066, "tokens_seen": 2529951744 }, { "epoch": 0.53, "learning_rate": 0.00047462010992563856, "loss": 1.2723, "theoretical_loss": 3.3652854783176305, "tokens_seen": 2530344960 }, { "epoch": 0.53, "learning_rate": 0.0004742967992240543, "loss": 1.2617, "theoretical_loss": 3.365229999779501, "tokens_seen": 2530869248 }, { "epoch": 0.53, "learning_rate": 0.00047397348852247013, "loss": 1.2343, "theoretical_loss": 3.365174535950185, "tokens_seen": 2531393536 }, { "epoch": 0.53, "objective/train/advantage_avg": -0.0006907347124069929, "objective/train/docs_used": 1422669, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.9019935131073, "objective/train/original_loss": 2.9019935131073, "objective/train/theoretical_loss": 3.365153740804889, "objective/train/tokens_used": 890449376, "objective/train/value_avg": -0.01529693603515625, "objective/train/value_loss": 0.008321361616253853, "objective/train/value_max": -0.00022876262664794922, "objective/train/value_min": -0.96142578125, "objective/train/value_reward_corr": 0.4862845694801836, "objective/train/value_std": 0.033599853515625, "objective/train/weight_avg": 0.9999715089797974, "objective/train/weighted_lm_loss": 2.901337146759033, "objective/train/weights_max": 1.081323504447937, "objective/train/weights_min": 0.9081640839576721, "theoretical_loss": 3.365153740804889, "tokens_seen": 2531590144 }, { "epoch": 0.53, "learning_rate": 0.0004736501778208859, "loss": 1.2539, "theoretical_loss": 3.3651190868227356, "tokens_seen": 2531917824 }, { "epoch": 0.53, "learning_rate": 0.0004733268671193017, "loss": 1.2898, "theoretical_loss": 3.3650636523902144, "tokens_seen": 2532442112 }, { "epoch": 0.53, "learning_rate": 0.0004730035564177174, "loss": 1.3285, "theoretical_loss": 3.365008232645685, "tokens_seen": 2532966400 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.008570603094995022, "objective/train/docs_used": 1423865, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4327809810638428, "objective/train/original_loss": 2.4327807426452637, "objective/train/theoretical_loss": 3.3649805282792515, "objective/train/tokens_used": 892087776, "objective/train/value_avg": -0.01122283935546875, "objective/train/value_loss": 0.0002672665286809206, "objective/train/value_max": -0.0001838207244873047, "objective/train/value_min": -0.2271728515625, "objective/train/value_reward_corr": 0.3969552022692324, "objective/train/value_std": 0.00966644287109375, "objective/train/weight_avg": 1.0008584260940552, "objective/train/weighted_lm_loss": 2.4350991249084473, "objective/train/weights_max": 1.0223942995071411, "objective/train/weights_min": 0.9663041830062866, "theoretical_loss": 3.3649805282792515, "tokens_seen": 2533228544 }, { "epoch": 0.53, "learning_rate": 0.0004726802457161332, "loss": 1.3024, "theoretical_loss": 3.364952827582217, "tokens_seen": 2533490688 }, { "epoch": 0.53, "learning_rate": 0.000472356935014549, "loss": 1.3294, "theoretical_loss": 3.3648974371928846, "tokens_seen": 2534014976 }, { "epoch": 0.53, "learning_rate": 0.0004720336243129648, "loss": 1.2574, "theoretical_loss": 3.3648420614707657, "tokens_seen": 2534539264 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.003614211454987526, "objective/train/docs_used": 1424480, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.7963662147521973, "objective/train/original_loss": 2.7963664531707764, "objective/train/theoretical_loss": 3.3648074590895622, "objective/train/tokens_used": 893726176, "objective/train/value_avg": -0.0173187255859375, "objective/train/value_loss": 0.0034270493779331446, "objective/train/value_max": -0.0004820823669433594, "objective/train/value_min": -0.97900390625, "objective/train/value_reward_corr": 0.6835823236239945, "objective/train/value_std": 0.047637939453125, "objective/train/weight_avg": 1.0003782510757446, "objective/train/weighted_lm_loss": 2.7973670959472656, "objective/train/weights_max": 1.0520254373550415, "objective/train/weights_min": 0.9064861536026001, "theoretical_loss": 3.3648074590895622, "tokens_seen": 2534866944 }, { "epoch": 0.53, "learning_rate": 0.00047171031361138054, "loss": 1.2851, "theoretical_loss": 3.3647867004089442, "tokens_seen": 2535063552 }, { "epoch": 0.53, "learning_rate": 0.0004713870029097963, "loss": 1.3847, "theoretical_loss": 3.364731354000508, "tokens_seen": 2535587840 }, { "epoch": 0.53, "learning_rate": 0.0004710636922082121, "loss": 1.2977, "theoretical_loss": 3.36467602223855, "tokens_seen": 2536112128 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0005914611392654479, "objective/train/docs_used": 1425841, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.3934168815612793, "objective/train/original_loss": 2.3934171199798584, "objective/train/theoretical_loss": 3.36463453302468, "objective/train/tokens_used": 895364576, "objective/train/value_avg": -0.017242431640625, "objective/train/value_loss": 0.0032334569841623306, "objective/train/value_max": -0.0002846717834472656, "objective/train/value_min": -0.5732421875, "objective/train/value_reward_corr": 0.5864883772561558, "objective/train/value_std": 0.034423828125, "objective/train/weight_avg": 1.000075101852417, "objective/train/weighted_lm_loss": 2.393468141555786, "objective/train/weights_max": 1.0587843656539917, "objective/train/weights_min": 0.9404534101486206, "theoretical_loss": 3.36463453302468, "tokens_seen": 2536505344 }, { "epoch": 0.53, "learning_rate": 0.0004707403815066279, "loss": 1.3482, "theoretical_loss": 3.364620705116167, "tokens_seen": 2536636416 }, { "epoch": 0.53, "learning_rate": 0.0004704170708050436, "loss": 1.3205, "theoretical_loss": 3.364565402626462, "tokens_seen": 2537160704 }, { "epoch": 0.53, "learning_rate": 0.00047009376010345944, "loss": 1.3145, "theoretical_loss": 3.3645101147625414, "tokens_seen": 2537684992 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.002803063252940774, "objective/train/docs_used": 1426240, "objective/train/instantaneous_batch_size": 16, "objective/train/instantaneous_microbatch_size": 16384, "objective/train/lm_loss": 2.4704105854034424, "objective/train/original_loss": 2.4704110622406006, "objective/train/theoretical_loss": 3.364461749873909, "objective/train/tokens_used": 897002976, "objective/train/value_avg": -0.02069091796875, "objective/train/value_loss": 0.0014521110570058227, "objective/train/value_max": -0.00035834312438964844, "objective/train/value_min": -0.98828125, "objective/train/value_reward_corr": 0.7777556883274961, "objective/train/value_std": 0.0419921875, "objective/train/weight_avg": 1.000287413597107, "objective/train/weighted_lm_loss": 2.4720351696014404, "objective/train/weights_max": 1.0524075031280518, "objective/train/weights_min": 0.9109139442443848, "theoretical_loss": 3.364461749873909, "tokens_seen": 2538143744 }, { "epoch": 0.54, "learning_rate": 0.0004697704494018752, "loss": 1.3297, "theoretical_loss": 3.3644548415175164, "tokens_seen": 2538209280 }, { "epoch": 0.54, "learning_rate": 0.000469447138700291, "loss": 1.2893, "theoretical_loss": 3.364399582884503, "tokens_seen": 2538733568 } ], "max_steps": 3125, "num_train_epochs": 9223372036854775807, "total_flos": 4.47637009037525e+17, "trial_name": null, "trial_params": null }