Soptq commited on
Commit
a66e0d7
·
verified ·
1 Parent(s): 62a0677

Upload main results checkpoints and logs

Browse files
K400_VideoMamba-M_15epochs_78.9/K400_VideoMamba-M_15epochs_78.9.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2143b611b77772e69bd21a33257cfcad40722949441168544fb7ee8d9dbf8761
3
+ size 887199826
K400_VideoMamba-M_15epochs_78.9/log.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.796761823604597e-06, "train_min_lr": 4.367240898793835e-10, "train_loss": 2.415704335212009, "train_loss_scale": 65536.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.843648377317971, "val_loss": 1.0091458527812203, "val_acc1": 76.80412610102017, "val_acc5": 93.44552460549599, "epoch": 0, "n_parameters": 73875856}
2
+ {"train_lr": 1.3398380911802303e-05, "train_min_lr": 1.0094248974896778e-09, "train_loss": 2.328023909435422, "train_loss_scale": 65536.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.640195181321539, "val_loss": 1.0080116096128156, "val_acc1": 77.06691176563122, "val_acc5": 93.3646675489251, "epoch": 1, "n_parameters": 73875856}
3
+ {"train_lr": 2.100000000000001e-05, "train_min_lr": 1.5821257050999747e-09, "train_loss": 2.2960658789998174, "train_loss_scale": 122574.65388711395, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 8.770347180838783, "val_loss": 1.0052299177553055, "val_acc1": 77.11239375802081, "val_acc5": 93.40509608762058, "epoch": 2, "n_parameters": 73875856}
4
+ {"train_lr": 2.8601619088197695e-05, "train_min_lr": 2.1548265127102664e-09, "train_loss": 2.2946665629712792, "train_loss_scale": 131072.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.6398136407549515, "val_loss": 1.0040937593917627, "val_acc1": 77.18819741641774, "val_acc5": 93.40004247430602, "epoch": 3, "n_parameters": 73875856}
5
+ {"train_lr": 3.620323817639537e-05, "train_min_lr": 2.727527320320565e-09, "train_loss": 2.282526657778742, "train_loss_scale": 228119.71884984025, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 4.900674860317486, "val_loss": 1.0059405708964464, "val_acc1": 77.41055434107274, "val_acc5": 93.25854268950214, "epoch": 4, "n_parameters": 73875856}
6
+ {"train_lr": 3.9689496212621115e-05, "train_min_lr": 2.990179627640739e-09, "train_loss": 2.2663153154543414, "train_loss_scale": 262144.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 4.025515019322356, "val_loss": 1.0092057626928397, "val_acc1": 76.9911081604411, "val_acc5": 93.35961398419067, "epoch": 5, "n_parameters": 73875856}
7
+ {"train_lr": 3.7861028442683466e-05, "train_min_lr": 2.852424110509085e-09, "train_loss": 2.2512129874417957, "train_loss_scale": 422180.2598509052, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.543935421683664, "val_loss": 1.0221622114830116, "val_acc1": 76.63735841157727, "val_acc5": 93.29391761488304, "epoch": 6, "n_parameters": 73875856}
8
+ {"train_lr": 3.438208573108315e-05, "train_min_lr": 2.5903229347665623e-09, "train_loss": 2.2379470121301908, "train_loss_scale": 524288.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.9288717998356355, "val_loss": 1.0152245172825844, "val_acc1": 77.15282242626331, "val_acc5": 93.3343462160387, "epoch": 7, "n_parameters": 73875856}
9
+ {"train_lr": 2.959321122965063e-05, "train_min_lr": 2.2295323896611103e-09, "train_loss": 2.2310928618406463, "train_loss_scale": 776242.1640042599, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.3360871454905143, "val_loss": 1.043170733296353, "val_acc1": 76.85971538249908, "val_acc5": 93.13220348786122, "epoch": 8, "n_parameters": 73875856}
10
+ {"train_lr": 2.396317334063676e-05, "train_min_lr": 1.8053691675232279e-09, "train_loss": 2.21587963455433, "train_loss_scale": 1048576.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 4.600536706602485, "val_loss": 1.0428140302805078, "val_acc1": 76.76875116869925, "val_acc5": 93.0816676924632, "epoch": 9, "n_parameters": 73875856}
11
+ {"train_lr": 1.804307939939855e-05, "train_min_lr": 1.3593533198546765e-09, "train_loss": 2.20849494574169, "train_loss_scale": 1416247.6166134186, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.5034991707156897, "val_loss": 1.0484550995977173, "val_acc1": 76.65251904563372, "val_acc5": 92.98564999952252, "epoch": 10, "n_parameters": 73875856}
12
+ {"train_lr": 1.2412429448624539e-05, "train_min_lr": 9.351439854004237e-10, "train_loss": 2.193803250567474, "train_loss_scale": 2097152.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 4.002372767978583, "val_loss": 1.0555723252467775, "val_acc1": 76.64241185139134, "val_acc5": 92.87447151984497, "epoch": 11, "n_parameters": 73875856}
13
+ {"train_lr": 7.6223907365415415e-06, "train_min_lr": 5.742657294570596e-10, "train_loss": 2.1940112734318604, "train_loss_scale": 2560021.810436635, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.599177379196825, "val_loss": 1.0612611761451631, "val_acc1": 76.5716619821228, "val_acc5": 92.78350729910514, "epoch": 12, "n_parameters": 73875856}
14
+ {"train_lr": 4.141845626450192e-06, "train_min_lr": 3.120438301029906e-10, "train_loss": 2.187208260444248, "train_loss_scale": 4194304.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.211724188508063, "val_loss": 1.0646917059441696, "val_acc1": 76.4251085863177, "val_acc5": 92.72286455005208, "epoch": 13, "n_parameters": 73875856}
15
+ {"train_lr": 2.311494124110158e-06, "train_min_lr": 1.7414639385439048e-10, "train_loss": 2.181950545035644, "train_loss_scale": 4575096.775292865, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.412514848323168, "val_loss": 1.0653279004303586, "val_acc1": 76.61714406935933, "val_acc5": 92.7835073268652, "epoch": 14, "n_parameters": 73875856}
16
+ {"Final top-1": 78.8699651286198, "Final Top-5": 93.93541213928337}
K400_VideoMamba-S_15epochs_78.5/K400_VideoMamba-S_15epochs_78.5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc75b7c246c27710ffd9194f269d63ba0dccf8584b8e9632bb4b24c3a147fa91
3
+ size 307340266
K400_VideoMamba-S_15epochs_78.5/log.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.796761823604603e-06, "train_min_lr": 4.3623151888942615e-09, "train_loss": 2.5616472823202168, "train_loss_scale": 65536.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 6.158362819490849, "val_loss": 1.0178429594335074, "val_acc1": 76.4352158175735, "val_acc5": 92.77340080117803, "epoch": 0, "n_parameters": 25568656}
2
+ {"train_lr": 1.3398380911802301e-05, "train_min_lr": 1.0082863905179636e-08, "train_loss": 2.4396387971818636, "train_loss_scale": 65536.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.166282560878668, "val_loss": 1.0207667527362132, "val_acc1": 76.48575156670472, "val_acc5": 92.79866837500947, "epoch": 1, "n_parameters": 25568656}
3
+ {"train_lr": 2.1e-05, "train_min_lr": 1.580341262146501e-08, "train_loss": 2.42595182079921, "train_loss_scale": 122592.10223642172, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.101046022896569, "val_loss": 1.0227424043517273, "val_acc1": 76.46048337289822, "val_acc5": 92.60157939663361, "epoch": 2, "n_parameters": 25568656}
4
+ {"train_lr": 2.8601619088197708e-05, "train_min_lr": 2.1523961337750373e-08, "train_loss": 2.413048079298835, "train_loss_scale": 131072.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.134975929102832, "val_loss": 1.027105179328758, "val_acc1": 76.29876927363648, "val_acc5": 92.72286506130017, "epoch": 3, "n_parameters": 25568656}
5
+ {"train_lr": 3.620323817639542e-05, "train_min_lr": 2.7244510054035785e-08, "train_loss": 2.4071600150725585, "train_loss_scale": 228154.6155484558, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.2080282063022083, "val_loss": 1.0328979573499124, "val_acc1": 76.45037646550999, "val_acc5": 92.4297581031295, "epoch": 4, "n_parameters": 25568656}
6
+ {"train_lr": 3.9689496212620803e-05, "train_min_lr": 2.986807073267247e-08, "train_loss": 2.3984077271466817, "train_loss_scale": 262144.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.2822956221299178, "val_loss": 1.0286080212547228, "val_acc1": 76.45543004875114, "val_acc5": 92.57125782315987, "epoch": 5, "n_parameters": 25568656}
7
+ {"train_lr": 3.7861028442683344e-05, "train_min_lr": 2.849206927394045e-08, "train_loss": 2.402562915469511, "train_loss_scale": 422250.0532481363, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.282633871823962, "val_loss": 1.0276872121299117, "val_acc1": 76.44532282443535, "val_acc5": 92.56620443423927, "epoch": 6, "n_parameters": 25568656}
8
+ {"train_lr": 3.438208573108315e-05, "train_min_lr": 2.587401369499487e-08, "train_loss": 2.379816637331812, "train_loss_scale": 524288.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.3572426992483413, "val_loss": 1.0279879876627371, "val_acc1": 76.73337624794502, "val_acc5": 92.55609713820995, "epoch": 7, "n_parameters": 25568656}
9
+ {"train_lr": 2.9593211229650625e-05, "train_min_lr": 2.2270177516968593e-08, "train_loss": 2.3812856625317256, "train_loss_scale": 776381.750798722, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.3208057187235775, "val_loss": 1.0402673817454622, "val_acc1": 76.57671547051702, "val_acc5": 92.36406158576813, "epoch": 8, "n_parameters": 25568656}
10
+ {"train_lr": 2.39631733406367e-05, "train_min_lr": 1.8033329334370106e-08, "train_loss": 2.3630672339687204, "train_loss_scale": 1048576.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.2645639426157245, "val_loss": 1.0322413163689466, "val_acc1": 76.65757270984176, "val_acc5": 92.64200787749559, "epoch": 9, "n_parameters": 25568656}
11
+ {"train_lr": 1.8043079399398582e-05, "train_min_lr": 1.3578201367169086e-08, "train_loss": 2.3535241872167436, "train_loss_scale": 1416526.790202343, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.171975224162824, "val_loss": 1.0325334313182304, "val_acc1": 76.61714418502632, "val_acc5": 92.48029365794018, "epoch": 10, "n_parameters": 25568656}
12
+ {"train_lr": 1.2412429448624583e-05, "train_min_lr": 9.340892581497073e-09, "train_loss": 2.35831763307317, "train_loss_scale": 2097152.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.2983405097477987, "val_loss": 1.036438956283606, "val_acc1": 76.52112628851174, "val_acc5": 92.53588300881927, "epoch": 11, "n_parameters": 25568656}
13
+ {"train_lr": 7.622390736541521e-06, "train_min_lr": 5.736180284361833e-09, "train_loss": 2.3493361811890514, "train_loss_scale": 2560580.1576144835, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.2631644649119678, "val_loss": 1.0344269850219672, "val_acc1": 76.8142332466824, "val_acc5": 92.65716841439178, "epoch": 12, "n_parameters": 25568656}
14
+ {"train_lr": 4.141845626450226e-06, "train_min_lr": 3.116918830914421e-09, "train_loss": 2.344041983810223, "train_loss_scale": 4194304.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.215242317563302, "val_loss": 1.035390361928596, "val_acc1": 76.65757257335471, "val_acc5": 92.50050789837117, "epoch": 13, "n_parameters": 25568656}
15
+ {"train_lr": 2.311494124110156e-06, "train_min_lr": 1.7394997816859342e-09, "train_loss": 2.343366749719563, "train_loss_scale": 4576213.469648562, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 3.2366997791936223, "val_loss": 1.0376251934526057, "val_acc1": 76.48575133537075, "val_acc5": 92.62179358154445, "epoch": 14, "n_parameters": 25568656}
16
+ {"Final top-1": 78.49598221054227, "Final Top-5": 93.64734421589932}
K400_VideoMamba-Ti_30epochs_76.5/K400_VideoMamba-Ti_30epochs_76.5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da282442c55de30dcca0ff39fdd7e13175e4e885302a6c534ee81957fba1bae6
3
+ size 84920938
K400_VideoMamba-Ti_30epochs_76.5/log.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 5.796761823604603e-06, "train_min_lr": 4.3623151888942615e-09, "train_loss": 2.766492031601925, "train_loss_scale": 65536.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.052987583909934, "val_loss": 1.1287840834030738, "val_acc1": 73.91348525464282, "val_acc5": 91.30281241783017, "epoch": 0, "n_parameters": 7033744}
2
+ {"train_lr": 1.3398380911802301e-05, "train_min_lr": 1.0082863905179636e-08, "train_loss": 2.7523064762353897, "train_loss_scale": 65536.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.1467811728691775, "val_loss": 1.1280001293366344, "val_acc1": 74.12068174187526, "val_acc5": 91.42409786041611, "epoch": 1, "n_parameters": 7033744}
3
+ {"train_lr": 2.1e-05, "train_min_lr": 1.580341262146501e-08, "train_loss": 2.7505678130002322, "train_loss_scale": 122592.10223642172, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.157926769937062, "val_loss": 1.128691458537315, "val_acc1": 73.9235924026184, "val_acc5": 91.23711595598895, "epoch": 2, "n_parameters": 7033744}
4
+ {"train_lr": 2.8601619088197708e-05, "train_min_lr": 2.1523961337750373e-08, "train_loss": 2.75004669057485, "train_loss_scale": 131072.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.187909469563867, "val_loss": 1.1297500137812817, "val_acc1": 73.67091411963024, "val_acc5": 91.4139907679607, "epoch": 3, "n_parameters": 7033744}
5
+ {"train_lr": 3.620323817639542e-05, "train_min_lr": 2.7244510054035785e-08, "train_loss": 2.748580263731198, "train_loss_scale": 228154.6155484558, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.194184198674049, "val_loss": 1.1309940941775074, "val_acc1": 73.72650322992199, "val_acc5": 91.28765156631978, "epoch": 4, "n_parameters": 7033744}
6
+ {"train_lr": 3.995011322111233e-05, "train_min_lr": 3.00641963574992e-08, "train_loss": 2.7458091202222756, "train_loss_scale": 262144.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.20612795030474, "val_loss": 1.1234162330197601, "val_acc1": 73.85284240380282, "val_acc5": 91.4695800448129, "epoch": 5, "n_parameters": 7033744}
7
+ {"train_lr": 3.965141816880098e-05, "train_min_lr": 2.9839415349896006e-08, "train_loss": 2.756216258175584, "train_loss_scale": 422250.0532481363, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.234621368404141, "val_loss": 1.127847880536977, "val_acc1": 73.95896738351945, "val_acc5": 91.22195525253231, "epoch": 6, "n_parameters": 7033744}
8
+ {"train_lr": 3.9058579110150524e-05, "train_min_lr": 2.939327819456398e-08, "train_loss": 2.736829403744211, "train_loss_scale": 524288.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.190921060574321, "val_loss": 1.1265247498209088, "val_acc1": 74.06509235398273, "val_acc5": 91.27249085129645, "epoch": 7, "n_parameters": 7033744}
9
+ {"train_lr": 3.818094547126076e-05, "train_min_lr": 2.873282074095189e-08, "train_loss": 2.7454542378806712, "train_loss_scale": 776381.750798722, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.257847345929049, "val_loss": 1.1216933087875636, "val_acc1": 73.93369955059396, "val_acc5": 91.38872297204864, "epoch": 8, "n_parameters": 7033744}
10
+ {"train_lr": 3.703235805888975e-05, "train_min_lr": 2.7868458797641472e-08, "train_loss": 2.735197303981319, "train_loss_scale": 1048576.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.203770769543795, "val_loss": 1.1213409971230879, "val_acc1": 73.90843163438824, "val_acc5": 91.44936554528786, "epoch": 9, "n_parameters": 7033744}
11
+ {"train_lr": 3.56309307826638e-05, "train_min_lr": 2.6813823868823523e-08, "train_loss": 2.7304389027678293, "train_loss_scale": 1416526.790202343, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.2953459284683895, "val_loss": 1.1244759876639223, "val_acc1": 74.08530656665364, "val_acc5": 91.3634551946433, "epoch": 10, "n_parameters": 7033744}
12
+ {"train_lr": 3.3998764987900644e-05, "train_min_lr": 2.5585548177333914e-08, "train_loss": 2.7344873623567394, "train_loss_scale": 2097152.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.310826039542786, "val_loss": 1.1359453071266985, "val_acc1": 73.66586059190925, "val_acc5": 91.26743730738208, "epoch": 11, "n_parameters": 7033744}
13
+ {"train_lr": 3.216160090419244e-05, "train_min_lr": 2.4203002364563575e-08, "train_loss": 2.727030398762518, "train_loss_scale": 2560580.1576144835, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.270268705325385, "val_loss": 1.1166783234534354, "val_acc1": 73.98423507764457, "val_acc5": 91.4493657858752, "epoch": 12, "n_parameters": 7033744}
14
+ {"train_lr": 3.014841170660801e-05, "train_min_lr": 2.268799000387303e-08, "train_loss": 2.7199884558193728, "train_loss_scale": 4194304.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.3104823669051715, "val_loss": 1.1240628100573444, "val_acc1": 74.00444954940953, "val_acc5": 91.25227649288513, "epoch": 13, "n_parameters": 7033744}
15
+ {"train_lr": 2.7990946591414164e-05, "train_min_lr": 2.106440374521459e-08, "train_loss": 2.7214455872742387, "train_loss_scale": 4576213.469648562, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.362951320327255, "val_loss": 1.1239129707503777, "val_acc1": 73.7568247201155, "val_acc5": 91.36345506509628, "epoch": 14, "n_parameters": 7033744}
16
+ {"train_lr": 2.5723230072284647e-05, "train_min_lr": 1.935784851377099e-08, "train_loss": 2.7111076358566906, "train_loss_scale": 8388608.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.4035205109812585, "val_loss": 1.1190634060364504, "val_acc1": 73.90337814368068, "val_acc5": 91.47463367432084, "epoch": 15, "n_parameters": 7033744}
17
+ {"train_lr": 2.3381025393394265e-05, "train_min_lr": 1.759523770498859e-08, "train_loss": 2.722221492467343, "train_loss_scale": 8388608.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.409071534332503, "val_loss": 1.1222793073393404, "val_acc1": 74.19143149085014, "val_acc5": 91.37861573153947, "epoch": 16, "n_parameters": 7033744}
18
+ {"train_lr": 2.1001270521694432e-05, "train_min_lr": 1.5804368744254613e-08, "train_loss": 2.7124379699666914, "train_loss_scale": 16451141.248136315, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.405713900971336, "val_loss": 1.1212108970906298, "val_acc1": 74.13584236205166, "val_acc5": 91.42915141589718, "epoch": 17, "n_parameters": 7033744}
19
+ {"train_lr": 1.8621495613108523e-05, "train_min_lr": 1.4013484704893083e-08, "train_loss": 2.705171400111831, "train_loss_scale": 16777216.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.393888255658622, "val_loss": 1.1152486553630576, "val_acc1": 74.26218143414555, "val_acc5": 91.31797278816589, "epoch": 18, "n_parameters": 7033744}
20
+ {"train_lr": 1.6279231139553547e-05, "train_min_lr": 1.2250828898026982e-08, "train_loss": 2.695304168806797, "train_loss_scale": 30722495.113951012, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.422047192042374, "val_loss": 1.1174136872331684, "val_acc1": 74.01455663261159, "val_acc5": 91.36345497256268, "epoch": 19, "n_parameters": 7033744}
21
+ {"train_lr": 1.4011416010978406e-05, "train_min_lr": 1.0544199458690121e-08, "train_loss": 2.6973604507212694, "train_loss_scale": 33554432.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.448751109349715, "val_loss": 1.1172529654935575, "val_acc1": 74.17627082440693, "val_acc5": 91.40893721247963, "epoch": 20, "n_parameters": 7033744}
22
+ {"train_lr": 1.1853815026687832e-05, "train_min_lr": 8.920510952631953e-09, "train_loss": 2.702480722444888, "train_loss_scale": 57085415.46325879, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.408106899870851, "val_loss": 1.115336077968375, "val_acc1": 74.28239581337692, "val_acc5": 91.4089371939729, "epoch": 21, "n_parameters": 7033744}
23
+ {"train_lr": 9.84045484309253e-06, "train_min_lr": 7.405369917537388e-09, "train_loss": 2.6873067954001715, "train_loss_scale": 67108864.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.480403551278404, "val_loss": 1.1171580284045866, "val_acc1": 74.18637800014258, "val_acc5": 91.36345515762986, "epoch": 22, "n_parameters": 7033744}
24
+ {"train_lr": 8.0030873530108e-06, "train_min_lr": 6.02267103263132e-09, "train_loss": 2.695888483359283, "train_loss_scale": 105451681.3972311, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.493371952329073, "val_loss": 1.1151480245977068, "val_acc1": 74.3733599138231, "val_acc5": 91.31291956580574, "epoch": 23, "n_parameters": 7033744}
25
+ {"train_lr": 6.370688939351329e-06, "train_min_lr": 4.79422028531298e-09, "train_loss": 2.6928711958840506, "train_loss_scale": 134217728.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.4894860900375155, "val_loss": 1.1161840702407062, "val_acc1": 74.47948480100614, "val_acc5": 91.31291934372513, "epoch": 24, "n_parameters": 7033744}
26
+ {"train_lr": 4.9690035002451514e-06, "train_min_lr": 3.739391077708465e-09, "train_loss": 2.6981368632362295, "train_loss_scale": 193465063.73588926, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.503132145244854, "val_loss": 1.115710699572586, "val_acc1": 74.35819926588661, "val_acc5": 91.31797288069947, "epoch": 25, "n_parameters": 7033744}
27
+ {"train_lr": 3.820136452393189e-06, "train_min_lr": 2.8748186965461473e-09, "train_loss": 2.6885893122710613, "train_loss_scale": 268435456.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.526718641876286, "val_loss": 1.1131801881397574, "val_acc1": 74.17627093544723, "val_acc5": 91.44431215636726, "epoch": 26, "n_parameters": 7033744}
28
+ {"train_lr": 2.942206115440049e-06, "train_min_lr": 2.2141379647474633e-09, "train_loss": 2.693267487283078, "train_loss_scale": 352053529.3546326, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.516044377518919, "val_loss": 1.113148590979668, "val_acc1": 74.37841343229073, "val_acc5": 91.37356232411216, "epoch": 27, "n_parameters": 7033744}
29
+ {"train_lr": 2.349057975249593e-06, "train_min_lr": 1.7677682121243997e-09, "train_loss": 2.682887580003728, "train_loss_scale": 536870912.0, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.491146889484474, "val_loss": 1.1139164198046694, "val_acc1": 74.3531456548854, "val_acc5": 91.39883012002421, "epoch": 28, "n_parameters": 7033744}
30
+ {"train_lr": 2.0500463323221688e-06, "train_min_lr": 1.5427489563242004e-09, "train_loss": 2.682087733627508, "train_loss_scale": 634353862.4749733, "train_weight_decay": 0.0999999999999966, "train_grad_norm": 5.537904883091188, "val_loss": 1.11356991998708, "val_acc1": 74.28239587815044, "val_acc5": 91.40893726799978, "epoch": 29, "n_parameters": 7033744}
31
+ {"Final top-1": 76.5350988022439, "Final Top-5": 92.86905544044069}
VideoMamba-B_15epochs_81.9/VideoMamba-B_15epochs_81.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5b7497d55bf413c3d68f3aa33106a928150a5ce8402044ef1a08ed73ad817a
3
+ size 1171687758
VideoMamba-B_15epochs_81.9/logs/log.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 2.6161203591419544, "train_grad_norm": 1.4870054344002672, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8365651273681801, "test_acc1": 81.67400000793457, "test_acc3": 93.50200001647949, "test_acc5": 95.73800001953126, "epoch": 0, "n_parameters": 97598440}
2
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 2.59058563804312, "train_grad_norm": 1.4039455189121712, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8408564518072775, "test_acc1": 81.62199999511719, "test_acc3": 93.37600001159667, "test_acc5": 95.68000001464844, "epoch": 1, "n_parameters": 97598440}
3
+ {"train_lr": 1.9792402206971924e-05, "train_loss_cls": 2.5997403140762727, "train_grad_norm": 1.4010080221793253, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8421620548303638, "test_acc1": 81.74400000976563, "test_acc3": 93.44200001037598, "test_acc5": 95.64600001464844, "epoch": 2, "n_parameters": 97598440}
4
+ {"train_lr": 1.917868184760557e-05, "train_loss_cls": 2.591801293557592, "train_grad_norm": 1.4330106148426291, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8298801461196675, "test_acc1": 81.73600000854492, "test_acc3": 93.48400002319336, "test_acc5": 95.80800001342773, "epoch": 3, "n_parameters": 97598440}
5
+ {"train_lr": 1.818566144656129e-05, "train_loss_cls": 2.592402919948244, "train_grad_norm": 1.397681054761179, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.83646357230538, "test_acc1": 81.55600000732422, "test_acc3": 93.43800001159669, "test_acc5": 95.70800001464843, "epoch": 4, "n_parameters": 97598440}
6
+ {"train_lr": 1.6856740760408965e-05, "train_loss_cls": 2.5893381974465557, "train_grad_norm": 1.5234446458155206, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8370186418142854, "test_acc1": 81.6160000189209, "test_acc3": 93.37600001037598, "test_acc5": 95.74000001342773, "epoch": 5, "n_parameters": 97598440}
7
+ {"train_lr": 1.5250000000000704e-05, "train_loss_cls": 2.5849340948031294, "train_grad_norm": 1.8965270598824742, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8440379478341463, "test_acc1": 81.54400000183105, "test_acc3": 93.30800000671387, "test_acc5": 95.66200001220703, "epoch": 6, "n_parameters": 97598440}
8
+ {"train_lr": 1.3435661446562572e-05, "train_loss_cls": 2.582312163165052, "train_grad_norm": 1.5318159784296814, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8325873408846709, "test_acc1": 81.76600000061035, "test_acc3": 93.54800000915527, "test_acc5": 95.74200001708985, "epoch": 7, "n_parameters": 97598440}
9
+ {"train_lr": 1.1493020401043087e-05, "train_loss_cls": 2.5727151509145085, "train_grad_norm": 1.4192168530609777, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8337490833839591, "test_acc1": 81.75400000976562, "test_acc3": 93.43400001159668, "test_acc5": 95.75600001464844, "epoch": 8, "n_parameters": 97598440}
10
+ {"train_lr": 9.506979598956982e-06, "train_loss_cls": 2.563212732366807, "train_grad_norm": 1.5865546749126045, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8364757006341705, "test_acc1": 81.7100000024414, "test_acc3": 93.42000000793458, "test_acc5": 95.71200001464844, "epoch": 9, "n_parameters": 97598440}
11
+ {"train_lr": 7.564338553438001e-06, "train_loss_cls": 2.5614819990621385, "train_grad_norm": 1.5757286844493674, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8336224710302693, "test_acc1": 81.78800000854493, "test_acc3": 93.40200001159668, "test_acc5": 95.72400001586914, "epoch": 10, "n_parameters": 97598440}
12
+ {"train_lr": 5.750000000000171e-06, "train_loss_cls": 2.5521171146111903, "train_grad_norm": 1.4413806494953725, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8346988151648215, "test_acc1": 81.87399999938965, "test_acc3": 93.38400000915527, "test_acc5": 95.73600001342774, "epoch": 11, "n_parameters": 97598440}
13
+ {"train_lr": 4.143259239590925e-06, "train_loss_cls": 2.558516750304247, "train_grad_norm": 1.3406096884577299, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8352586951258839, "test_acc1": 81.78199999816894, "test_acc3": 93.48600001037597, "test_acc5": 95.74800001220703, "epoch": 12, "n_parameters": 97598440}
14
+ {"train_lr": 2.8143385534381205e-06, "train_loss_cls": 2.549954787087288, "train_grad_norm": 1.4732744793335406, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8289175109595669, "test_acc1": 81.88200000061035, "test_acc3": 93.46200000793458, "test_acc5": 95.75800001342773, "epoch": 13, "n_parameters": 97598440}
15
+ {"train_lr": 1.8213181523952925e-06, "train_loss_cls": 2.54208091890021, "train_grad_norm": 1.5292939539674184, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.832456745511415, "test_acc1": 81.84199999816894, "test_acc3": 93.40400000915527, "test_acc5": 95.74400001342774, "epoch": 14, "n_parameters": 97598440}
VideoMamba-B_15epochs_81.9/logs/log_rank0.txt ADDED
The diff for this file is too large to render. See raw diff
 
VideoMamba-S_15epochs_80.1/VideoMamba-S_15epochs_80.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76e7188ca7d56f5e35a5e46cee581ecfb68d5cd405083fa9317c5160d0170198
3
+ size 310064910
VideoMamba-S_15epochs_80.1/logs/log.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 3.1011218713175097, "train_grad_norm": 1.9570119299000497, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9045450846303482, "test_acc1": 79.59400001220703, "test_acc3": 92.1960000189209, "test_acc5": 94.80800002075195, "epoch": 0, "n_parameters": 25796584}
2
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 3.0705538070578275, "train_grad_norm": 1.8329485441855105, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9080041601341597, "test_acc1": 79.62000000183106, "test_acc3": 92.17600001281738, "test_acc5": 94.73000001708985, "epoch": 1, "n_parameters": 25796584}
3
+ {"train_lr": 1.9792402206971924e-05, "train_loss_cls": 3.070421175907651, "train_grad_norm": 1.812621396651371, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9046816757443, "test_acc1": 79.85800000732422, "test_acc3": 92.33000002319336, "test_acc5": 94.82200001586914, "epoch": 2, "n_parameters": 25796584}
4
+ {"train_lr": 1.917868184760557e-05, "train_loss_cls": 3.0615568573145175, "train_grad_norm": 1.81360788582612, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.896313057870281, "test_acc1": 79.69800002197266, "test_acc3": 92.18600001831055, "test_acc5": 94.77000001464843, "epoch": 3, "n_parameters": 25796584}
5
+ {"train_lr": 1.818566144656129e-05, "train_loss_cls": 3.0649267470927164, "train_grad_norm": 1.823461710549087, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8952216969764962, "test_acc1": 79.8820000177002, "test_acc3": 92.33000001037598, "test_acc5": 94.84400001586914, "epoch": 4, "n_parameters": 25796584}
6
+ {"train_lr": 1.6856740760408965e-05, "train_loss_cls": 3.0552061642173953, "train_grad_norm": 1.8355475519772628, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8922132053697596, "test_acc1": 79.94400000610352, "test_acc3": 92.29600002319336, "test_acc5": 94.86400001831055, "epoch": 5, "n_parameters": 25796584}
7
+ {"train_lr": 1.5250000000000704e-05, "train_loss_cls": 3.051111589899833, "train_grad_norm": 1.8363814669833194, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8875212704344672, "test_acc1": 79.94800000732423, "test_acc3": 92.44200002441406, "test_acc5": 94.93800001953124, "epoch": 6, "n_parameters": 25796584}
8
+ {"train_lr": 1.3435661446562572e-05, "train_loss_cls": 3.044103423980691, "train_grad_norm": 1.8571236432789804, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8880173618696174, "test_acc1": 79.99800000854492, "test_acc3": 92.39800000915527, "test_acc5": 94.90400001586914, "epoch": 7, "n_parameters": 25796584}
9
+ {"train_lr": 1.1493020401043087e-05, "train_loss_cls": 3.0445323398859383, "train_grad_norm": 1.800949070808127, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8898946596499608, "test_acc1": 79.93000000061035, "test_acc3": 92.37800001281738, "test_acc5": 94.87000001831055, "epoch": 8, "n_parameters": 25796584}
10
+ {"train_lr": 9.506979598956982e-06, "train_loss_cls": 3.031377558752025, "train_grad_norm": 1.802635353484409, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8864300846293265, "test_acc1": 80.07000001281739, "test_acc3": 92.40000001159667, "test_acc5": 94.90600001586914, "epoch": 9, "n_parameters": 25796584}
11
+ {"train_lr": 7.564338553438001e-06, "train_loss_cls": 3.02635682183061, "train_grad_norm": 1.8023254900432224, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8850315447364535, "test_acc1": 80.04600001525878, "test_acc3": 92.40400001403809, "test_acc5": 94.82400001831054, "epoch": 10, "n_parameters": 25796584}
12
+ {"train_lr": 5.750000000000171e-06, "train_loss_cls": 3.0174996326986454, "train_grad_norm": 1.821181112627903, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8877719417214394, "test_acc1": 80.1220000189209, "test_acc3": 92.41600001159668, "test_acc5": 94.92200001708984, "epoch": 11, "n_parameters": 25796584}
13
+ {"train_lr": 4.143259239590925e-06, "train_loss_cls": 3.0274003962461324, "train_grad_norm": 1.8278975451497628, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8835078300718142, "test_acc1": 80.11800001525879, "test_acc3": 92.43400002319336, "test_acc5": 94.90600001708984, "epoch": 12, "n_parameters": 25796584}
14
+ {"train_lr": 2.8143385534381205e-06, "train_loss_cls": 3.017869420331731, "train_grad_norm": 1.8065000613339894, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8824080675080115, "test_acc1": 80.18400000488282, "test_acc3": 92.47800002197266, "test_acc5": 94.97200001708984, "epoch": 13, "n_parameters": 25796584}
15
+ {"train_lr": 1.8213181523952925e-06, "train_loss_cls": 3.008188795855196, "train_grad_norm": 1.8200004399537468, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.8822356097552241, "test_acc1": 80.14200001525879, "test_acc3": 92.52400001525879, "test_acc5": 94.93200001708985, "epoch": 14, "n_parameters": 25796584}
VideoMamba-S_15epochs_80.1/logs/log_rank0.txt ADDED
The diff for this file is too large to render. See raw diff
 
VideoMamba-Ti_30epoch_75.9/VideoMamba-Ti_30epoch_75.9.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2715ff72189e7f4ffac2e3c8a025bef9e20aa3ace80080521f88d8f9b1fe2f98
3
+ size 86281550
VideoMamba-Ti_30epoch_75.9/logs/log.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 4.109744407171063, "train_grad_norm": 2.568242895374481, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0964893838580774, "test_acc1": 75.41400000671386, "test_acc3": 89.380000022583, "test_acc5": 92.67200000793457, "epoch": 0, "n_parameters": 7148008}
2
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 4.0306456360123235, "train_grad_norm": 2.381616983720534, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0970627754470523, "test_acc1": 75.41199999389649, "test_acc3": 89.4560000201416, "test_acc5": 92.68800000793458, "epoch": 1, "n_parameters": 7148008}
3
+ {"train_lr": 1.99479580059997e-05, "train_loss_cls": 4.009104005271773, "train_grad_norm": 2.353635536013938, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.101040245699031, "test_acc1": 75.39200000671387, "test_acc3": 89.4240000177002, "test_acc5": 92.76000000915528, "epoch": 2, "n_parameters": 7148008}
4
+ {"train_lr": 1.9792402206971924e-05, "train_loss_cls": 3.986015054914686, "train_grad_norm": 2.3335984574138022, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0933334537762769, "test_acc1": 75.49599999633789, "test_acc3": 89.4800000164795, "test_acc5": 92.72600000549316, "epoch": 3, "n_parameters": 7148008}
5
+ {"train_lr": 1.9535036904804593e-05, "train_loss_cls": 3.985597376462272, "train_grad_norm": 2.327545544416975, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0949311096753394, "test_acc1": 75.46200000671386, "test_acc3": 89.55600001525879, "test_acc5": 92.69600000671387, "epoch": 4, "n_parameters": 7148008}
6
+ {"train_lr": 1.917868184760557e-05, "train_loss_cls": 3.9686558759279196, "train_grad_norm": 2.3256065488623965, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.099917967404638, "test_acc1": 75.56800001464843, "test_acc3": 89.4580000177002, "test_acc5": 92.73400000549316, "epoch": 5, "n_parameters": 7148008}
7
+ {"train_lr": 1.8727241335952554e-05, "train_loss_cls": 3.962469764702993, "train_grad_norm": 2.325471501175067, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0889167274747575, "test_acc1": 75.55800001831055, "test_acc3": 89.5400000201416, "test_acc5": 92.78400000549317, "epoch": 6, "n_parameters": 7148008}
8
+ {"train_lr": 1.818566144656129e-05, "train_loss_cls": 3.9501315558271157, "train_grad_norm": 2.3245107534882736, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.100563684440389, "test_acc1": 75.46800001586914, "test_acc3": 89.6180000189209, "test_acc5": 92.82400001037598, "epoch": 7, "n_parameters": 7148008}
9
+ {"train_lr": 1.7559875842035764e-05, "train_loss_cls": 3.955525519178926, "train_grad_norm": 2.3302258245950696, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.095063730542149, "test_acc1": 75.45200000671387, "test_acc3": 89.46400001647949, "test_acc5": 92.83000001586915, "epoch": 8, "n_parameters": 7148008}
10
+ {"train_lr": 1.6856740760408965e-05, "train_loss_cls": 3.9434123988107714, "train_grad_norm": 2.3384902300022774, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0880026192385324, "test_acc1": 75.62800001586913, "test_acc3": 89.5520000164795, "test_acc5": 92.78600001586913, "epoch": 9, "n_parameters": 7148008}
11
+ {"train_lr": 1.6083959896779512e-05, "train_loss_cls": 3.9383803070973245, "train_grad_norm": 2.336742444861707, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0959583830316455, "test_acc1": 75.51400001098632, "test_acc3": 89.53200001403809, "test_acc5": 92.72200000671387, "epoch": 10, "n_parameters": 7148008}
12
+ {"train_lr": 1.5250000000000704e-05, "train_loss_cls": 3.9276959155436804, "train_grad_norm": 2.3376349046838274, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0919956136114743, "test_acc1": 75.47000001708984, "test_acc3": 89.5440000201416, "test_acc5": 92.80000000427246, "epoch": 11, "n_parameters": 7148008}
13
+ {"train_lr": 1.4363998109219696e-05, "train_loss_cls": 3.9388876454197437, "train_grad_norm": 2.3452561197997475, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0941313277853995, "test_acc1": 75.48200000183105, "test_acc3": 89.56200001647949, "test_acc5": 92.85400000549316, "epoch": 12, "n_parameters": 7148008}
14
+ {"train_lr": 1.3435661446562572e-05, "train_loss_cls": 3.9281485223655794, "train_grad_norm": 2.3504986518102107, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0883273709641428, "test_acc1": 75.64800001342773, "test_acc3": 89.55600001403809, "test_acc5": 92.78600000549316, "epoch": 13, "n_parameters": 7148008}
15
+ {"train_lr": 1.2475161062768665e-05, "train_loss_cls": 3.914257163671757, "train_grad_norm": 2.3513140677929307, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.091814335891787, "test_acc1": 75.56600000549317, "test_acc3": 89.5540000152588, "test_acc5": 92.78200000427246, "epoch": 14, "n_parameters": 7148008}
16
+ {"train_lr": 1.1493020401043087e-05, "train_loss_cls": 3.9153729249819293, "train_grad_norm": 2.3641840944187247, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0904595927924525, "test_acc1": 75.63400001342774, "test_acc3": 89.55200001525878, "test_acc5": 92.85400000427246, "epoch": 15, "n_parameters": 7148008}
17
+ {"train_lr": 1.050000000000009e-05, "train_loss_cls": 3.908405305741788, "train_grad_norm": 2.3737196314820856, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0809222719803149, "test_acc1": 75.72200000549316, "test_acc3": 89.6560000201416, "test_acc5": 92.92200000793457, "epoch": 16, "n_parameters": 7148008}
18
+ {"train_lr": 9.506979598956982e-06, "train_loss_cls": 3.913117215549536, "train_grad_norm": 2.379765828069356, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0850850542893216, "test_acc1": 75.57000000183105, "test_acc3": 89.5700000164795, "test_acc5": 92.81800000671387, "epoch": 17, "n_parameters": 7148008}
19
+ {"train_lr": 8.52483893723146e-06, "train_loss_cls": 3.9145280947264056, "train_grad_norm": 2.3869640053414423, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0872480298335454, "test_acc1": 75.70600000549317, "test_acc3": 89.5700000177002, "test_acc5": 92.91600000549316, "epoch": 18, "n_parameters": 7148008}
20
+ {"train_lr": 7.564338553438001e-06, "train_loss_cls": 3.9100575162161837, "train_grad_norm": 2.39755946726536, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.078853369139287, "test_acc1": 75.77999999145507, "test_acc3": 89.70400001281739, "test_acc5": 92.94600000549316, "epoch": 19, "n_parameters": 7148008}
21
+ {"train_lr": 6.636001890780014e-06, "train_loss_cls": 3.9031639891943866, "train_grad_norm": 2.3974741417155276, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0862219706937974, "test_acc1": 75.71400000549316, "test_acc3": 89.7080000177002, "test_acc5": 92.90800000549316, "epoch": 20, "n_parameters": 7148008}
22
+ {"train_lr": 5.750000000000171e-06, "train_loss_cls": 3.9001541842850185, "train_grad_norm": 2.4070163077111246, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0841803401708603, "test_acc1": 75.75200000549316, "test_acc3": 89.6380000177002, "test_acc5": 92.90000000549317, "epoch": 21, "n_parameters": 7148008}
23
+ {"train_lr": 4.91604010322163e-06, "train_loss_cls": 3.8866444147414536, "train_grad_norm": 2.4140999886057646, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0858967406865285, "test_acc1": 75.88400000671386, "test_acc3": 89.67400001525878, "test_acc5": 92.93600000427246, "epoch": 22, "n_parameters": 7148008}
24
+ {"train_lr": 4.143259239590925e-06, "train_loss_cls": 3.8899570204180587, "train_grad_norm": 2.4234902347973306, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.084352230006943, "test_acc1": 75.84000001586914, "test_acc3": 89.76200001525879, "test_acc5": 92.91800000671387, "epoch": 23, "n_parameters": 7148008}
25
+ {"train_lr": 3.440124157964769e-06, "train_loss_cls": 3.8876271927766473, "train_grad_norm": 2.424770165976289, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0810398569672692, "test_acc1": 75.76800000183106, "test_acc3": 89.71000001525879, "test_acc5": 92.92800000549316, "epoch": 24, "n_parameters": 7148008}
26
+ {"train_lr": 2.8143385534381205e-06, "train_loss_cls": 3.8833685080877407, "train_grad_norm": 2.4342222248049947, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.078636265682931, "test_acc1": 75.80000000305176, "test_acc3": 89.68000001525878, "test_acc5": 92.93800000549317, "epoch": 25, "n_parameters": 7148008}
27
+ {"train_lr": 2.272758664047819e-06, "train_loss_cls": 3.879055080796889, "train_grad_norm": 2.442805887316819, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.078783720655709, "test_acc1": 75.89400000915528, "test_acc3": 89.74400001647949, "test_acc5": 93.06200000671387, "epoch": 26, "n_parameters": 7148008}
28
+ {"train_lr": 1.8213181523952925e-06, "train_loss_cls": 3.892569375004795, "train_grad_norm": 2.4446983252593175, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0778319844786002, "test_acc1": 75.85600000549316, "test_acc3": 89.70200001525879, "test_acc5": 92.97000000549316, "epoch": 27, "n_parameters": 7148008}
29
+ {"train_lr": 1.4649630951960854e-06, "train_loss_cls": 3.878857155545629, "train_grad_norm": 2.446654536645952, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.076435207484328, "test_acc1": 75.89000000427247, "test_acc3": 89.75400001770019, "test_acc5": 93.02600000549316, "epoch": 28, "n_parameters": 7148008}
30
+ {"train_lr": 1.207597793028789e-06, "train_loss_cls": 3.8902807979942033, "train_grad_norm": 2.451066364487298, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.0799574412557544, "test_acc1": 75.86600000793457, "test_acc3": 89.70000001403808, "test_acc5": 92.97000000549316, "epoch": 29, "n_parameters": 7148008}
VideoMamba-Ti_30epoch_75.9/logs/log_rank0.txt ADDED
The diff for this file is too large to render. See raw diff
 
Vim-B_15epochs_81.3/Vim-B_15epochs_81.3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:806d2719fb90369a42080289cc2d9b8e2d2ed28d141093930e3164b39c667163
3
+ size 1562230540
Vim-B_15epochs_81.3/logs/log.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 2.886431756601345, "train_grad_norm": 2.090696767746783, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.9941253190746113, "test_acc1": 80.27600001220704, "test_acc3": 92.29400001647949, "test_acc5": 94.78400001281739, "epoch": 0, "n_parameters": 97598440}
2
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 2.7957865701471682, "train_grad_norm": 1.8435214773165904, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.9719956600088246, "test_acc1": 80.36400000610351, "test_acc3": 92.37800001525879, "test_acc5": 94.90600001831055, "epoch": 1, "n_parameters": 97598440}
3
+ {"train_lr": 1.9792402206971924e-05, "train_loss_cls": 2.7844764190397675, "train_grad_norm": 1.7657440263304494, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.947865047853212, "test_acc1": 80.36000000854492, "test_acc3": 92.4980000164795, "test_acc5": 95.08600001953126, "epoch": 2, "n_parameters": 97598440}
4
+ {"train_lr": 1.917868184760557e-05, "train_loss_cls": 2.766332420835392, "train_grad_norm": 1.722887649143533, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.9236456138671053, "test_acc1": 80.49000000976562, "test_acc3": 92.61600001281738, "test_acc5": 95.07200001464844, "epoch": 3, "n_parameters": 97598440}
5
+ {"train_lr": 1.818566144656129e-05, "train_loss_cls": 2.7597595358447586, "train_grad_norm": 1.6959846973609771, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.9004414837655365, "test_acc1": 80.76800000854492, "test_acc3": 92.70200001159668, "test_acc5": 95.13000000549316, "epoch": 4, "n_parameters": 97598440}
6
+ {"train_lr": 1.6856740760408965e-05, "train_loss_cls": 2.7430951047382006, "train_grad_norm": 1.673333115476689, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8791250031517477, "test_acc1": 80.88000000732421, "test_acc3": 92.80800001403809, "test_acc5": 95.19400001953125, "epoch": 5, "n_parameters": 97598440}
7
+ {"train_lr": 1.5250000000000704e-05, "train_loss_cls": 2.7344580098545905, "train_grad_norm": 1.6681355018314603, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8692467336599924, "test_acc1": 80.88200000610351, "test_acc3": 92.90400001281738, "test_acc5": 95.31000000549317, "epoch": 6, "n_parameters": 97598440}
8
+ {"train_lr": 1.3435661446562572e-05, "train_loss_cls": 2.720739315215537, "train_grad_norm": 1.668292485219207, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8542824472319714, "test_acc1": 80.98999999694824, "test_acc3": 92.94800001403809, "test_acc5": 95.28000000549316, "epoch": 7, "n_parameters": 97598440}
9
+ {"train_lr": 1.1493020401043087e-05, "train_loss_cls": 2.7185177715371647, "train_grad_norm": 1.6554693152769198, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8421520494988987, "test_acc1": 81.12200000854492, "test_acc3": 92.99200001281739, "test_acc5": 95.33400000671386, "epoch": 8, "n_parameters": 97598440}
10
+ {"train_lr": 9.506979598956982e-06, "train_loss_cls": 2.704304024076862, "train_grad_norm": 1.6396371118075175, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8370836463053616, "test_acc1": 81.1760000189209, "test_acc3": 92.98400001281739, "test_acc5": 95.31200000793457, "epoch": 9, "n_parameters": 97598440}
11
+ {"train_lr": 7.564338553438001e-06, "train_loss_cls": 2.696352439842445, "train_grad_norm": 1.6340051233816109, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.826532296570284, "test_acc1": 81.27200000610351, "test_acc3": 93.03000001403808, "test_acc5": 95.35200001953125, "epoch": 10, "n_parameters": 97598440}
12
+ {"train_lr": 5.750000000000171e-06, "train_loss_cls": 2.6865324449243784, "train_grad_norm": 1.6216552582004373, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8225263446493416, "test_acc1": 81.21800000732422, "test_acc3": 93.07600001403809, "test_acc5": 95.39000000793457, "epoch": 11, "n_parameters": 97598440}
13
+ {"train_lr": 4.143259239590925e-06, "train_loss_cls": 2.693540482307605, "train_grad_norm": 1.6365385837882733, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8173249365419758, "test_acc1": 81.2560000189209, "test_acc3": 93.08800001281739, "test_acc5": 95.40800000671386, "epoch": 12, "n_parameters": 97598440}
14
+ {"train_lr": 2.8143385534381205e-06, "train_loss_cls": 2.683338670981683, "train_grad_norm": 1.610735275667253, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8122848117610022, "test_acc1": 81.29200000976563, "test_acc3": 93.09600001159669, "test_acc5": 95.42600000671386, "epoch": 13, "n_parameters": 97598440}
15
+ {"train_lr": 1.8213181523952925e-06, "train_loss_cls": 2.6734757268076224, "train_grad_norm": 1.6066600632229202, "test_flops": 13.207011839999973, "test_layer_flops": 13.109437439999983, "test_loss": 0.8085130040666887, "test_acc1": 81.26399999450683, "test_acc3": 93.08800001281739, "test_acc5": 95.42200000671387, "epoch": 14, "n_parameters": 97598440}
Vim-B_15epochs_81.3/logs/log_rank0.txt ADDED
The diff for this file is too large to render. See raw diff
 
Vim-S_15epoch_80.0/Vim-S_15epoch_80.0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:764695db70b116bbe7a4700ea402696db2040ec3a9feac6b2c2aa896cf0bef5e
3
+ size 310064782
Vim-S_15epoch_80.0/logs/log.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 3.271263865424956, "train_grad_norm": 3.77395350651013, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9494853053926205, "test_acc1": 78.9979999963379, "test_acc3": 91.6280000201416, "test_acc5": 94.38000002197266, "epoch": 0, "n_parameters": 25796584}
2
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 3.1850958590170175, "train_grad_norm": 3.269430468027159, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9403691373157258, "test_acc1": 79.13600001037598, "test_acc3": 91.68800002258301, "test_acc5": 94.33000000915527, "epoch": 1, "n_parameters": 25796584}
3
+ {"train_lr": 1.9792402206971924e-05, "train_loss_cls": 3.1782645015455455, "train_grad_norm": 3.159058173831037, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9357890171207943, "test_acc1": 79.25000000549316, "test_acc3": 91.6980000177002, "test_acc5": 94.48200000671386, "epoch": 2, "n_parameters": 25796584}
4
+ {"train_lr": 1.917868184760557e-05, "train_loss_cls": 3.161882351997564, "train_grad_norm": 3.0595827419980823, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9236160915116874, "test_acc1": 79.23200001220704, "test_acc3": 91.8400000152588, "test_acc5": 94.58200000427246, "epoch": 3, "n_parameters": 25796584}
5
+ {"train_lr": 1.818566144656129e-05, "train_loss_cls": 3.163294707961696, "train_grad_norm": 3.125807422289936, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9405236551348044, "test_acc1": 79.22800001159668, "test_acc3": 91.8080000177002, "test_acc5": 94.50200001831055, "epoch": 4, "n_parameters": 25796584}
6
+ {"train_lr": 1.6856740760408965e-05, "train_loss_cls": 3.1424902381299495, "train_grad_norm": 3.02753069675226, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9278158303730342, "test_acc1": 79.49400001159668, "test_acc3": 91.87800001159668, "test_acc5": 94.59200001342774, "epoch": 5, "n_parameters": 25796584}
7
+ {"train_lr": 1.5250000000000704e-05, "train_loss_cls": 3.1334562540459308, "train_grad_norm": 3.0376962509086662, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.917939904392982, "test_acc1": 79.48400001220703, "test_acc3": 91.9920000152588, "test_acc5": 94.65600001708984, "epoch": 6, "n_parameters": 25796584}
8
+ {"train_lr": 1.3435661446562572e-05, "train_loss_cls": 3.1188629552257434, "train_grad_norm": 3.017434767920146, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9244503983277448, "test_acc1": 79.4540000177002, "test_acc3": 91.96200001403808, "test_acc5": 94.63800001708984, "epoch": 7, "n_parameters": 25796584}
9
+ {"train_lr": 1.1493020401043087e-05, "train_loss_cls": 3.1183869554746826, "train_grad_norm": 3.0714923731333537, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9178109774936218, "test_acc1": 79.61800000549316, "test_acc3": 91.99800001525878, "test_acc5": 94.68600001708984, "epoch": 8, "n_parameters": 25796584}
10
+ {"train_lr": 9.506979598956982e-06, "train_loss_cls": 3.1000627765147617, "train_grad_norm": 3.0653168835895332, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9183333830109665, "test_acc1": 79.68999999938964, "test_acc3": 92.08600002197265, "test_acc5": 94.68000001464844, "epoch": 9, "n_parameters": 25796584}
11
+ {"train_lr": 7.564338553438001e-06, "train_loss_cls": 3.0907045940963105, "train_grad_norm": 3.0096466279715943, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9157353293682847, "test_acc1": 79.78399999694824, "test_acc3": 92.09200001403809, "test_acc5": 94.73800001464843, "epoch": 10, "n_parameters": 25796584}
12
+ {"train_lr": 5.750000000000171e-06, "train_loss_cls": 3.0778871272250616, "train_grad_norm": 3.079953721148981, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9130085582027629, "test_acc1": 79.75800000183105, "test_acc3": 92.17800001403809, "test_acc5": 94.72600001831054, "epoch": 11, "n_parameters": 25796584}
13
+ {"train_lr": 4.143259239590925e-06, "train_loss_cls": 3.083729615850414, "train_grad_norm": 3.068265227009829, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.909653323569468, "test_acc1": 79.75799999938965, "test_acc3": 92.1820000189209, "test_acc5": 94.84800001953126, "epoch": 12, "n_parameters": 25796584}
14
+ {"train_lr": 2.8143385534381205e-06, "train_loss_cls": 3.0722520787962715, "train_grad_norm": 3.0578678989772508, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9035565682819912, "test_acc1": 79.95199999694825, "test_acc3": 92.24000001525879, "test_acc5": 94.78000001953124, "epoch": 13, "n_parameters": 25796584}
15
+ {"train_lr": 1.8213181523952925e-06, "train_loss_cls": 3.0596308719387633, "train_grad_norm": 3.0248222887087213, "test_flops": 3.5843811839999957, "test_layer_flops": 3.5355939840000015, "test_loss": 0.9073043323445077, "test_acc1": 79.87399999938965, "test_acc3": 92.2160000164795, "test_acc5": 94.77400001831055, "epoch": 14, "n_parameters": 25796584}
Vim-S_15epoch_80.0/logs/log_rank0.txt ADDED
The diff for this file is too large to render. See raw diff
 
Vim-Ti_30epoch_75.3/Vim-Ti_30epoch_75.3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba2c2031c11fbcdc0a85188335777bcfb3127e0ac979a45b0098c7ecf850218
3
+ size 86281550
Vim-Ti_30epoch_75.3/logs/log.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 4.14430966234798, "train_grad_norm": 5.109803345658891, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1930296838435592, "test_acc1": 73.8520000012207, "test_acc3": 88.4460000238037, "test_acc5": 91.81800001281738, "epoch": 0, "n_parameters": 7148008}
2
+ {"train_lr": 1.999999999999932e-05, "train_loss_cls": 3.927368625200433, "train_grad_norm": 4.4985829662266585, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.188682764008337, "test_acc1": 73.8719999987793, "test_acc3": 88.3480000238037, "test_acc5": 91.88600002563477, "epoch": 1, "n_parameters": 7148008}
3
+ {"train_lr": 1.99479580059997e-05, "train_loss_cls": 3.9911425334277104, "train_grad_norm": 4.423578583746314, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.183570134016324, "test_acc1": 73.90800000793458, "test_acc3": 88.3420000201416, "test_acc5": 91.80600001953125, "epoch": 2, "n_parameters": 7148008}
4
+ {"train_lr": 1.9792402206971924e-05, "train_loss_cls": 3.966443438109734, "train_grad_norm": 4.392660962973091, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.175546608409103, "test_acc1": 74.01200000671386, "test_acc3": 88.49800000488281, "test_acc5": 91.97600001831054, "epoch": 3, "n_parameters": 7148008}
5
+ {"train_lr": 1.9535036904804593e-05, "train_loss_cls": 3.846440093337203, "train_grad_norm": 4.364879900698277, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1816388898510106, "test_acc1": 73.93200001159668, "test_acc3": 88.41200002380371, "test_acc5": 91.91800002319336, "epoch": 4, "n_parameters": 7148008}
6
+ {"train_lr": 1.917868184760557e-05, "train_loss_cls": 3.9560511340912012, "train_grad_norm": 4.322937359722207, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1612164982575544, "test_acc1": 74.06400000549317, "test_acc3": 88.47000001525879, "test_acc5": 91.92400001953125, "epoch": 5, "n_parameters": 7148008}
7
+ {"train_lr": 1.8727241335952554e-05, "train_loss_cls": 3.9402438203255525, "train_grad_norm": 4.30677681870693, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1780387234626983, "test_acc1": 74.02600001403809, "test_acc3": 88.44400001403808, "test_acc5": 91.92800000793457, "epoch": 6, "n_parameters": 7148008}
8
+ {"train_lr": 1.818566144656129e-05, "train_loss_cls": 3.9229564683424005, "train_grad_norm": 4.2969196935732965, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1700888252531996, "test_acc1": 74.13999999633789, "test_acc3": 88.58800001403809, "test_acc5": 92.01800001831054, "epoch": 7, "n_parameters": 7148008}
9
+ {"train_lr": 1.7559875842035764e-05, "train_loss_cls": 3.9274431976149504, "train_grad_norm": 4.292222131070473, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1686362335268332, "test_acc1": 74.178, "test_acc3": 88.51600002258301, "test_acc5": 91.89400001159667, "epoch": 8, "n_parameters": 7148008}
10
+ {"train_lr": 1.6856740760408965e-05, "train_loss_cls": 3.908019540407103, "train_grad_norm": 4.2476892316941735, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1660003172499793, "test_acc1": 74.40600000671387, "test_acc3": 88.5560000177002, "test_acc5": 92.02400001953124, "epoch": 9, "n_parameters": 7148008}
11
+ {"train_lr": 1.6083959896779512e-05, "train_loss_cls": 3.9026558477100997, "train_grad_norm": 4.297512572732189, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1618987537008159, "test_acc1": 74.3260000012207, "test_acc3": 88.63800001525878, "test_acc5": 92.06600000671386, "epoch": 10, "n_parameters": 7148008}
12
+ {"train_lr": 1.5250000000000704e-05, "train_loss_cls": 3.8924265496498296, "train_grad_norm": 4.314868252721431, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1571638431324034, "test_acc1": 74.30000000122071, "test_acc3": 88.6520000189209, "test_acc5": 92.04200001831055, "epoch": 11, "n_parameters": 7148008}
13
+ {"train_lr": 1.4363998109219696e-05, "train_loss_cls": 3.889358990984283, "train_grad_norm": 4.318287301978333, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1583176953908132, "test_acc1": 74.35, "test_acc3": 88.6360000189209, "test_acc5": 92.18000001586914, "epoch": 12, "n_parameters": 7148008}
14
+ {"train_lr": 1.3435661446562572e-05, "train_loss_cls": 3.8802417990305633, "train_grad_norm": 4.323365069312348, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1457149785544192, "test_acc1": 74.48600001708985, "test_acc3": 88.7780000177002, "test_acc5": 92.23000000671387, "epoch": 13, "n_parameters": 7148008}
15
+ {"train_lr": 1.2475161062768665e-05, "train_loss_cls": 3.8714121954141856, "train_grad_norm": 4.31851294429468, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1641017896484356, "test_acc1": 74.3079999975586, "test_acc3": 88.6900000238037, "test_acc5": 92.10200000915528, "epoch": 14, "n_parameters": 7148008}
16
+ {"train_lr": 1.1493020401043087e-05, "train_loss_cls": 3.862408112517173, "train_grad_norm": 4.315611733997659, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1550452824758024, "test_acc1": 74.56200000122071, "test_acc3": 88.7720000201416, "test_acc5": 92.20600001831055, "epoch": 15, "n_parameters": 7148008}
17
+ {"train_lr": 1.050000000000009e-05, "train_loss_cls": 3.7835851827923723, "train_grad_norm": 4.335505924922385, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1477052493363011, "test_acc1": 74.60400000915527, "test_acc3": 88.77000000732421, "test_acc5": 92.21800002197266, "epoch": 16, "n_parameters": 7148008}
18
+ {"train_lr": 9.506979598956982e-06, "train_loss_cls": 3.861541086725956, "train_grad_norm": 4.40322476229984, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.15299635494546, "test_acc1": 74.5439999975586, "test_acc3": 88.83400001770019, "test_acc5": 92.25200000793457, "epoch": 17, "n_parameters": 7148008}
19
+ {"train_lr": 8.52483893723146e-06, "train_loss_cls": 3.854324943155979, "train_grad_norm": 4.395158407118299, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1542752610937672, "test_acc1": 74.7040000012207, "test_acc3": 88.9720000189209, "test_acc5": 92.28000001831055, "epoch": 18, "n_parameters": 7148008}
20
+ {"train_lr": 7.564338553438001e-06, "train_loss_cls": 3.8414510347955613, "train_grad_norm": 4.398114434820856, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1410690066309608, "test_acc1": 74.728, "test_acc3": 88.93200001647949, "test_acc5": 92.31800001831054, "epoch": 19, "n_parameters": 7148008}
21
+ {"train_lr": 6.636001890780014e-06, "train_loss_cls": 3.841975244686758, "train_grad_norm": 4.418613755064522, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.142587496461917, "test_acc1": 74.93799999511718, "test_acc3": 88.91200001892089, "test_acc5": 92.32200001708985, "epoch": 20, "n_parameters": 7148008}
22
+ {"train_lr": 5.750000000000171e-06, "train_loss_cls": 3.8285304533777764, "train_grad_norm": 4.445215531295057, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1360707917839896, "test_acc1": 74.87400000793457, "test_acc3": 89.0820000164795, "test_acc5": 92.35000001831055, "epoch": 21, "n_parameters": 7148008}
23
+ {"train_lr": 4.91604010322163e-06, "train_loss_cls": 3.8266869187116814, "train_grad_norm": 4.487750900258645, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1367687370095934, "test_acc1": 75.0399999987793, "test_acc3": 89.09200001525879, "test_acc5": 92.40600001831055, "epoch": 22, "n_parameters": 7148008}
24
+ {"train_lr": 4.143259239590925e-06, "train_loss_cls": 3.8178302178518186, "train_grad_norm": 4.470582633757953, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1374168802736973, "test_acc1": 74.9839999975586, "test_acc3": 89.10600001525879, "test_acc5": 92.34800001831054, "epoch": 23, "n_parameters": 7148008}
25
+ {"train_lr": 3.440124157964769e-06, "train_loss_cls": 3.8227040284066844, "train_grad_norm": 4.497257135850158, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1352581352147522, "test_acc1": 75.01599999511718, "test_acc3": 89.0140000189209, "test_acc5": 92.45800000793457, "epoch": 24, "n_parameters": 7148008}
26
+ {"train_lr": 2.8143385534381205e-06, "train_loss_cls": 3.813551417572035, "train_grad_norm": 4.515210981849286, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.13080732524395, "test_acc1": 75.00199999511719, "test_acc3": 89.10400001647949, "test_acc5": 92.42400001708984, "epoch": 25, "n_parameters": 7148008}
27
+ {"train_lr": 2.272758664047819e-06, "train_loss_cls": 3.8102502877430187, "train_grad_norm": 4.5449921857062385, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1320936688202985, "test_acc1": 75.09999999389649, "test_acc3": 89.0720000152588, "test_acc5": 92.43200001953124, "epoch": 26, "n_parameters": 7148008}
28
+ {"train_lr": 1.8213181523952925e-06, "train_loss_cls": 3.8041823888234765, "train_grad_norm": 4.546170520172607, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1264575120745872, "test_acc1": 75.1799999987793, "test_acc3": 89.16800001525878, "test_acc5": 92.43600001586914, "epoch": 27, "n_parameters": 7148008}
29
+ {"train_lr": 1.4649630951960854e-06, "train_loss_cls": 3.815477527136044, "train_grad_norm": 4.5590389665844535, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.1258087288968417, "test_acc1": 75.1399999987793, "test_acc3": 89.1740000164795, "test_acc5": 92.52000001831054, "epoch": 28, "n_parameters": 7148008}
30
+ {"train_lr": 1.207597793028789e-06, "train_loss_cls": 3.806694619732795, "train_grad_norm": 4.549412622916804, "test_flops": 1.2845387520000022, "test_layer_flops": 1.2508439039999966, "test_loss": 1.126151869263576, "test_acc1": 75.25399999267579, "test_acc3": 89.14200001403809, "test_acc5": 92.50600001586913, "epoch": 29, "n_parameters": 7148008}
Vim-Ti_30epoch_75.3/logs/log_rank0.txt ADDED
The diff for this file is too large to render. See raw diff