zaydzuhri commited on
Commit
b37a95d
·
verified ·
1 Parent(s): 2f9282b

Training in progress, step 4096

Browse files
Files changed (2) hide show
  1. model.safetensors +1 -1
  2. trainer_log.jsonl +64 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a55efecfef3c95c4a35787ed0b8132f9f0ce29e707af34d17cab60a6019637a
3
  size 49958912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:848d3995958d72df3ea60abffde26a7f99154de9b0a700bb6892494b5bffe666
3
  size 49958912
trainer_log.jsonl CHANGED
@@ -63,3 +63,67 @@
63
  {"current_steps": 2016, "total_steps": 5000, "loss": 4.0005, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00021535335064773418, "epoch": 0.11474756673686608, "percentage": 40.32}
64
  {"current_steps": 2048, "total_steps": 5000, "loss": 3.918, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0002127191039101997, "epoch": 0.1165689566850703, "percentage": 40.96}
65
  {"current_steps": 2080, "total_steps": 5000, "loss": 4.0924, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00021006392599655905, "epoch": 0.11839034663327452, "percentage": 41.6}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  {"current_steps": 2016, "total_steps": 5000, "loss": 4.0005, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00021535335064773418, "epoch": 0.11474756673686608, "percentage": 40.32}
64
  {"current_steps": 2048, "total_steps": 5000, "loss": 3.918, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0002127191039101997, "epoch": 0.1165689566850703, "percentage": 40.96}
65
  {"current_steps": 2080, "total_steps": 5000, "loss": 4.0924, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00021006392599655905, "epoch": 0.11839034663327452, "percentage": 41.6}
66
+ {"current_steps": 2112, "total_steps": 5000, "loss": 3.9803, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00020738898155559963, "epoch": 0.12021173658147874, "percentage": 42.24}
67
+ {"current_steps": 2144, "total_steps": 5000, "loss": 3.9268, "eval_loss": null, "predict_loss": null, "learning_rate": 0.000204695443906361, "epoch": 0.12203312652968297, "percentage": 42.88}
68
+ {"current_steps": 2176, "total_steps": 5000, "loss": 3.9334, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00020198449452347837, "epoch": 0.12385451647788719, "percentage": 43.52}
69
+ {"current_steps": 2208, "total_steps": 5000, "loss": 4.0019, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00019925732251894874, "epoch": 0.12567590642609142, "percentage": 44.16}
70
+ {"current_steps": 2240, "total_steps": 5000, "loss": 3.9677, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00019651512412054723, "epoch": 0.12749729637429563, "percentage": 44.8}
71
+ {"current_steps": 2272, "total_steps": 5000, "loss": 3.898, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00019375910214712184, "epoch": 0.12931868632249985, "percentage": 45.44}
72
+ {"current_steps": 2304, "total_steps": 5000, "loss": 3.8423, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00019099046548099748, "epoch": 0.1311400762707041, "percentage": 46.08}
73
+ {"current_steps": 2336, "total_steps": 5000, "loss": 3.933, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00018821042853772024, "epoch": 0.1329614662189083, "percentage": 46.72}
74
+ {"current_steps": 2368, "total_steps": 5000, "loss": 3.9181, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001854202107333746, "epoch": 0.13478285616711252, "percentage": 47.36}
75
+ {"current_steps": 2400, "total_steps": 5000, "loss": 3.7366, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00018262103594970697, "epoch": 0.13660424611531674, "percentage": 48.0}
76
+ {"current_steps": 2432, "total_steps": 5000, "loss": 3.8646, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001798141319972911, "epoch": 0.13842563606352098, "percentage": 48.64}
77
+ {"current_steps": 2464, "total_steps": 5000, "loss": 3.886, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00017700073007696963, "epoch": 0.1402470260117252, "percentage": 49.28}
78
+ {"current_steps": 2496, "total_steps": 5000, "loss": 3.9239, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001741820642398085, "epoch": 0.14206841595992942, "percentage": 49.92}
79
+ {"current_steps": 2528, "total_steps": 5000, "loss": 3.9335, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00017135937084580174, "epoch": 0.14388980590813363, "percentage": 50.56}
80
+ {"current_steps": 2560, "total_steps": 5000, "loss": 3.7822, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00016853388802156287, "epoch": 0.14571119585633788, "percentage": 51.2}
81
+ {"current_steps": 2592, "total_steps": 5000, "loss": 3.7745, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001657068551172416, "epoch": 0.1475325858045421, "percentage": 51.84}
82
+ {"current_steps": 2624, "total_steps": 5000, "loss": 3.9055, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001628795121629042, "epoch": 0.1493539757527463, "percentage": 52.48}
83
+ {"current_steps": 2656, "total_steps": 5000, "loss": 3.8666, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00016005309932461487, "epoch": 0.15117536570095053, "percentage": 53.12}
84
+ {"current_steps": 2688, "total_steps": 5000, "loss": 3.8175, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001572288563604584, "epoch": 0.15299675564915477, "percentage": 53.76}
85
+ {"current_steps": 2720, "total_steps": 5000, "loss": 3.7587, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00015440802207674096, "epoch": 0.15481814559735899, "percentage": 54.4}
86
+ {"current_steps": 2752, "total_steps": 5000, "loss": 3.7472, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00015159183378460873, "epoch": 0.1566395355455632, "percentage": 55.04}
87
+ {"current_steps": 2784, "total_steps": 5000, "loss": 3.7517, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001487815267573226, "epoch": 0.15846092549376742, "percentage": 55.68}
88
+ {"current_steps": 2816, "total_steps": 5000, "loss": 3.7935, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00014597833368842634, "epoch": 0.16028231544197166, "percentage": 56.32}
89
+ {"current_steps": 2848, "total_steps": 5000, "loss": 3.7042, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00014318348415104625, "epoch": 0.16210370539017588, "percentage": 56.96}
90
+ {"current_steps": 2880, "total_steps": 5000, "loss": 3.8657, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00014039820405856008, "epoch": 0.1639250953383801, "percentage": 57.6}
91
+ {"current_steps": 2912, "total_steps": 5000, "loss": 3.9127, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00013762371512687085, "epoch": 0.1657464852865843, "percentage": 58.24}
92
+ {"current_steps": 2944, "total_steps": 5000, "loss": 3.7388, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00013486123433852158, "epoch": 0.16756787523478855, "percentage": 58.88}
93
+ {"current_steps": 2976, "total_steps": 5000, "loss": 3.7517, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00013211197340888697, "epoch": 0.16938926518299277, "percentage": 59.52}
94
+ {"current_steps": 3008, "total_steps": 5000, "loss": 3.7917, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00012937713825467468, "epoch": 0.171210655131197, "percentage": 60.16}
95
+ {"current_steps": 3040, "total_steps": 5000, "loss": 3.8429, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00012665792846497045, "epoch": 0.17303204507940123, "percentage": 60.8}
96
+ {"current_steps": 3072, "total_steps": 5000, "loss": 3.8099, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00012395553677505878, "epoch": 0.17485343502760545, "percentage": 61.44}
97
+ {"current_steps": 3104, "total_steps": 5000, "loss": 3.6473, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00012127114854324987, "epoch": 0.17667482497580966, "percentage": 62.08}
98
+ {"current_steps": 3136, "total_steps": 5000, "loss": 3.752, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00011860594123094184, "epoch": 0.17849621492401388, "percentage": 62.72}
99
+ {"current_steps": 3168, "total_steps": 5000, "loss": 3.854, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00011596108388614794, "epoch": 0.18031760487221812, "percentage": 63.36}
100
+ {"current_steps": 3200, "total_steps": 5000, "loss": 3.7159, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00011333773663071288, "epoch": 0.18213899482042234, "percentage": 64.0}
101
+ {"current_steps": 3232, "total_steps": 5000, "loss": 3.8161, "eval_loss": null, "predict_loss": null, "learning_rate": 0.0001107370501514454, "epoch": 0.18396038476862656, "percentage": 64.64}
102
+ {"current_steps": 3264, "total_steps": 5000, "loss": 3.8101, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00010816016519538871, "epoch": 0.18578177471683077, "percentage": 65.28}
103
+ {"current_steps": 3296, "total_steps": 5000, "loss": 3.6743, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00010560821206945143, "epoch": 0.18760316466503502, "percentage": 65.92}
104
+ {"current_steps": 3328, "total_steps": 5000, "loss": 3.6678, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00010308231014461753, "epoch": 0.18942455461323923, "percentage": 66.56}
105
+ {"current_steps": 3360, "total_steps": 5000, "loss": 3.6747, "eval_loss": null, "predict_loss": null, "learning_rate": 0.00010058356736495284, "epoch": 0.19124594456144345, "percentage": 67.2}
106
+ {"current_steps": 3392, "total_steps": 5000, "loss": 3.6702, "eval_loss": null, "predict_loss": null, "learning_rate": 9.811307976162497e-05, "epoch": 0.19306733450964766, "percentage": 67.84}
107
+ {"current_steps": 3424, "total_steps": 5000, "loss": 3.7184, "eval_loss": null, "predict_loss": null, "learning_rate": 9.567193097214706e-05, "epoch": 0.1948887244578519, "percentage": 68.48}
108
+ {"current_steps": 3456, "total_steps": 5000, "loss": 3.817, "eval_loss": null, "predict_loss": null, "learning_rate": 9.326119176505879e-05, "epoch": 0.19671011440605612, "percentage": 69.12}
109
+ {"current_steps": 3488, "total_steps": 5000, "loss": 3.6828, "eval_loss": null, "predict_loss": null, "learning_rate": 9.088191957025219e-05, "epoch": 0.19853150435426034, "percentage": 69.76}
110
+ {"current_steps": 3520, "total_steps": 5000, "loss": 3.7414, "eval_loss": null, "predict_loss": null, "learning_rate": 8.85351580151476e-05, "epoch": 0.20035289430246456, "percentage": 70.4}
111
+ {"current_steps": 3552, "total_steps": 5000, "loss": 3.7528, "eval_loss": null, "predict_loss": null, "learning_rate": 8.622193646692415e-05, "epoch": 0.2021742842506688, "percentage": 71.04}
112
+ {"current_steps": 3584, "total_steps": 5000, "loss": 3.7287, "eval_loss": null, "predict_loss": null, "learning_rate": 8.394326958100568e-05, "epoch": 0.20399567419887302, "percentage": 71.68}
113
+ {"current_steps": 3616, "total_steps": 5000, "loss": 3.6552, "eval_loss": null, "predict_loss": null, "learning_rate": 8.170015685599894e-05, "epoch": 0.20581706414707723, "percentage": 72.32}
114
+ {"current_steps": 3648, "total_steps": 5000, "loss": 3.7106, "eval_loss": null, "predict_loss": null, "learning_rate": 7.949358219528073e-05, "epoch": 0.20763845409528145, "percentage": 72.96}
115
+ {"current_steps": 3680, "total_steps": 5000, "loss": 3.6786, "eval_loss": null, "predict_loss": null, "learning_rate": 7.73245134754252e-05, "epoch": 0.2094598440434857, "percentage": 73.6}
116
+ {"current_steps": 3712, "total_steps": 5000, "loss": 3.7026, "eval_loss": null, "predict_loss": null, "learning_rate": 7.51939021216613e-05, "epoch": 0.2112812339916899, "percentage": 74.24}
117
+ {"current_steps": 3744, "total_steps": 5000, "loss": 3.826, "eval_loss": null, "predict_loss": null, "learning_rate": 7.31026826905461e-05, "epoch": 0.21310262393989413, "percentage": 74.88}
118
+ {"current_steps": 3776, "total_steps": 5000, "loss": 3.6673, "eval_loss": null, "predict_loss": null, "learning_rate": 7.105177246003757e-05, "epoch": 0.21492401388809834, "percentage": 75.52}
119
+ {"current_steps": 3808, "total_steps": 5000, "loss": 3.6773, "eval_loss": null, "predict_loss": null, "learning_rate": 6.904207102714602e-05, "epoch": 0.21674540383630259, "percentage": 76.16}
120
+ {"current_steps": 3840, "total_steps": 5000, "loss": 3.7685, "eval_loss": null, "predict_loss": null, "learning_rate": 6.707445991334119e-05, "epoch": 0.2185667937845068, "percentage": 76.8}
121
+ {"current_steps": 3872, "total_steps": 5000, "loss": 3.7505, "eval_loss": null, "predict_loss": null, "learning_rate": 6.514980217788767e-05, "epoch": 0.22038818373271102, "percentage": 77.44}
122
+ {"current_steps": 3904, "total_steps": 5000, "loss": 3.7051, "eval_loss": null, "predict_loss": null, "learning_rate": 6.32689420392789e-05, "epoch": 0.22220957368091526, "percentage": 78.08}
123
+ {"current_steps": 3936, "total_steps": 5000, "loss": 3.7203, "eval_loss": null, "predict_loss": null, "learning_rate": 6.143270450493458e-05, "epoch": 0.22403096362911948, "percentage": 78.72}
124
+ {"current_steps": 3968, "total_steps": 5000, "loss": 3.6477, "eval_loss": null, "predict_loss": null, "learning_rate": 5.964189500932548e-05, "epoch": 0.2258523535773237, "percentage": 79.36}
125
+ {"current_steps": 4000, "total_steps": 5000, "loss": 3.5716, "eval_loss": null, "predict_loss": null, "learning_rate": 5.789729906068326e-05, "epoch": 0.2276737435255279, "percentage": 80.0}
126
+ {"current_steps": 4032, "total_steps": 5000, "loss": 3.6775, "eval_loss": null, "predict_loss": null, "learning_rate": 5.6199681896450386e-05, "epoch": 0.22949513347373215, "percentage": 80.64}
127
+ {"current_steps": 4064, "total_steps": 5000, "loss": 3.6312, "eval_loss": null, "predict_loss": null, "learning_rate": 5.454978814762181e-05, "epoch": 0.23131652342193637, "percentage": 81.28}
128
+ {"current_steps": 4096, "total_steps": 5000, "loss": 3.6917, "eval_loss": null, "predict_loss": null, "learning_rate": 5.29483415121254e-05, "epoch": 0.2331379133701406, "percentage": 81.92}
129
+ {"current_steps": 4128, "total_steps": 5000, "loss": 3.6064, "eval_loss": null, "predict_loss": null, "learning_rate": 5.139604443738368e-05, "epoch": 0.2349593033183448, "percentage": 82.56}