adamkarvonen commited on
Commit
f997cba
·
verified ·
1 Parent(s): d1b5949

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. matryoshka_othello/f1_results.csv +0 -0
  2. matryoshka_othello/results.csv +161 -0
  3. matryoshka_othello/trainer_0/ae.pt +3 -0
  4. matryoshka_othello/trainer_0/config.json +53 -0
  5. matryoshka_othello/trainer_0/results.csv +5 -0
  6. matryoshka_othello/trainer_1/ae.pt +3 -0
  7. matryoshka_othello/trainer_1/config.json +53 -0
  8. matryoshka_othello/trainer_1/results.csv +5 -0
  9. matryoshka_othello/trainer_10/ae.pt +3 -0
  10. matryoshka_othello/trainer_10/config.json +53 -0
  11. matryoshka_othello/trainer_10/results.csv +5 -0
  12. matryoshka_othello/trainer_11/ae.pt +3 -0
  13. matryoshka_othello/trainer_11/config.json +53 -0
  14. matryoshka_othello/trainer_11/results.csv +5 -0
  15. matryoshka_othello/trainer_12/ae.pt +3 -0
  16. matryoshka_othello/trainer_12/config.json +53 -0
  17. matryoshka_othello/trainer_12/results.csv +5 -0
  18. matryoshka_othello/trainer_13/ae.pt +3 -0
  19. matryoshka_othello/trainer_13/config.json +53 -0
  20. matryoshka_othello/trainer_13/results.csv +5 -0
  21. matryoshka_othello/trainer_14/ae.pt +3 -0
  22. matryoshka_othello/trainer_14/config.json +53 -0
  23. matryoshka_othello/trainer_14/results.csv +5 -0
  24. matryoshka_othello/trainer_15/ae.pt +3 -0
  25. matryoshka_othello/trainer_15/config.json +53 -0
  26. matryoshka_othello/trainer_15/results.csv +5 -0
  27. matryoshka_othello/trainer_16/ae.pt +3 -0
  28. matryoshka_othello/trainer_16/config.json +53 -0
  29. matryoshka_othello/trainer_16/results.csv +5 -0
  30. matryoshka_othello/trainer_17/ae.pt +3 -0
  31. matryoshka_othello/trainer_17/config.json +53 -0
  32. matryoshka_othello/trainer_17/results.csv +5 -0
  33. matryoshka_othello/trainer_18/ae.pt +3 -0
  34. matryoshka_othello/trainer_18/config.json +53 -0
  35. matryoshka_othello/trainer_18/results.csv +5 -0
  36. matryoshka_othello/trainer_19/ae.pt +3 -0
  37. matryoshka_othello/trainer_19/config.json +53 -0
  38. matryoshka_othello/trainer_19/results.csv +5 -0
  39. matryoshka_othello/trainer_2/ae.pt +3 -0
  40. matryoshka_othello/trainer_2/config.json +53 -0
  41. matryoshka_othello/trainer_2/results.csv +5 -0
  42. matryoshka_othello/trainer_20/ae.pt +3 -0
  43. matryoshka_othello/trainer_20/config.json +53 -0
  44. matryoshka_othello/trainer_20/results.csv +5 -0
  45. matryoshka_othello/trainer_21/ae.pt +3 -0
  46. matryoshka_othello/trainer_21/config.json +53 -0
  47. matryoshka_othello/trainer_21/results.csv +5 -0
  48. matryoshka_othello/trainer_22/ae.pt +3 -0
  49. matryoshka_othello/trainer_22/config.json +53 -0
  50. matryoshka_othello/trainer_22/results.csv +5 -0
matryoshka_othello/f1_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
matryoshka_othello/results.csv ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Unnamed: 0,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_0/,autoencoders/matryoshka_othello/trainer_0/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,9.920000076293944,54.032108306884766,12.195414543151855,0.0478515625,0.6925392150878906,0.8403303623199463,0.8648781180381775,2.054067611694336,2.5528128147125244,4.913147449493408,0.8255574703216553,2502,59000,3776000,0,10.004157066345217,10.004157066345217,0.6467792987823486,0.6467792987823486,0.7024305462837219,1826214,1826214,44887,44887,1949786,1949786,15201,15201,15119,15119,"tensor([18648, 18648, 18648, 18665, 18778, 19179, 19587, 20977, 21130, 15219,
3
+ 2861], device='cuda:0')","tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,10.004157066345217,10.004157066345217,0.5270017385482788,0.5270017385482788,0.5584830641746521,722269,722269,12781,12781,1283731,1283731,3684,3684,3674,3674,"tensor([5886, 5886, 5886, 5889, 5909, 6019, 6171, 6596, 6358, 3406, 278],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,0,10.004157066345217,10.004157066345217,0.5241131782531738,0.5241131782531738,0.607089102268219,180916,180916,2615,2615,325923,325923,0,0,0,0,"tensor([611, 611, 611, 611, 615, 657, 712, 839, 921, 790, 179],
5
+ device='cuda:0')","tensor([31, 31, 31, 31, 32, 33, 31, 37, 39, 38, 10], device='cuda:0')"
6
+ 1,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_1/,autoencoders/matryoshka_othello/trainer_1/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,30.31999969482422,113.8661117553711,8.712231636047363,0.092529296875,0.841371476650238,0.9205113649368286,0.931857407093048,2.054067611694336,2.059459686279297,4.913147449493408,0.998114049434662,1501,59000,3776000,1,29.97489547729492,29.96699905395508,0.7845427393913269,0.7854730486869812,0.8259817957878113,2470892,2476241,52043,52854,1305108,1299759,23814,24623,23594,24399,"tensor([11563, 11567, 11576, 11661, 11942, 12713, 13935, 15006, 15493, 13308,
7
+ 3167], device='cuda:0')","tensor([12, 12, 12, 12, 11, 9, 7, 3, 0, 0, 0], device='cuda:0')",59000,2006000,2,29.97489547729492,29.80812263488769,0.7183125615119934,0.7183449268341064,0.7515202164649963,1138210,1138225,24912,24796,867790,867775,8102,8062,8062,8022,"tensor([2389, 2389, 2391, 2433, 2571, 2781, 3069, 3256, 3104, 1964, 286],
8
+ device='cuda:0')","tensor([2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,29.97489547729492,25.32160758972168,0.5449132323265076,0.5775192379951477,0.8116542100906372,190988,206626,3158,2099,315851,300213,0,0,0,0,"tensor([224, 224, 224, 236, 262, 324, 388, 461, 504, 509, 172],
9
+ device='cuda:0')","tensor([44, 44, 44, 42, 42, 44, 28, 31, 30, 24, 6], device='cuda:0')"
10
+ 2,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_2/,autoencoders/matryoshka_othello/trainer_2/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,52.63999938964844,160.45501708984375,6.988826751708984,0.11962890625,0.8998516798019409,0.9507719278335572,0.9625293016433716,2.054067611694336,2.0564510822296143,4.913147449493408,0.9991663694381714,1541,59000,3776000,2,50.94383239746094,47.7843132019043,0.7862502336502075,0.793076753616333,0.8443553447723389,2494429,2524621,74699,66029,1281571,1251379,37813,33972,37368,33599,"tensor([ 7562, 7619, 7944, 8424, 9375, 10583, 12019, 13344, 14409, 11636,
11
+ 2879], device='cuda:0')","tensor([43, 43, 43, 44, 33, 22, 13, 8, 0, 0, 0], device='cuda:0')",59000,2006000,3,50.94383239746094,42.56414031982422,0.7287904024124146,0.7426672577857971,0.7913922667503357,1171567,1195516,37533,18004,834433,810484,13787,4853,13776,4844,"tensor([1832, 1832, 1860, 1976, 2229, 2494, 2753, 2872, 2661, 1522, 262],
12
+ device='cuda:0')","tensor([32, 32, 32, 33, 22, 11, 4, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,50.94383239746094,42.56414031982422,0.3663000166416168,0.5692099332809448,0.7786582708358765,114553,202610,4068,2450,392286,304229,0,0,0,0,"tensor([142, 142, 159, 185, 230, 280, 339, 371, 423, 424, 170],
13
+ device='cuda:0')","tensor([24, 24, 37, 41, 36, 29, 21, 19, 22, 23, 6], device='cuda:0')"
14
+ 3,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_3/,autoencoders/matryoshka_othello/trainer_3/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,72.79999542236328,194.2470703125,5.881448745727539,0.145751953125,0.9286208152770996,0.9651238918304444,0.9737438559532166,2.054067611694336,2.055142879486084,4.913147449493408,0.9996238946914672,1698,59000,3776000,2,70.99925994873047,62.69881820678711,0.7420945763587952,0.7985299825668335,0.8537569642066956,2274757,2554768,79882,67910,1501243,1221232,31501,34386,31071,34029,"tensor([ 6999, 7025, 7271, 7774, 9084, 10511, 12059, 13793, 14535, 11419,
15
+ 1995], device='cuda:0')","tensor([52, 52, 72, 74, 58, 31, 13, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,70.99925994873047,52.67372512817383,0.6254706978797913,0.7599217295646667,0.8068196177482605,929197,1240804,35996,18806,1076803,765196,6517,4506,6504,4501,"tensor([1640, 1640, 1731, 1925, 2237, 2551, 2834, 3027, 2750, 1516, 194],
16
+ device='cuda:0')","tensor([35, 35, 55, 59, 47, 20, 3, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,70.99925994873047,52.67372512817383,0.2024037837982177,0.618309736251831,0.7781156301498413,57249,228633,1603,4070,449590,278206,0,0,0,0,"tensor([122, 123, 151, 186, 254, 293, 329, 378, 416, 388, 123],
17
+ device='cuda:0')","tensor([10, 11, 36, 46, 33, 29, 20, 18, 20, 22, 5], device='cuda:0')"
18
+ 4,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_4/,autoencoders/matryoshka_othello/trainer_4/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,95.91999816894533,231.90640258789065,5.228255271911621,0.178466796875,0.9441828727722168,0.9726977944374084,0.9836472272872924,2.054067611694336,2.054480791091919,4.913147449493408,0.9998554587364196,1899,59000,3776000,3,91.89859771728516,60.149044036865234,0.6312516927719116,0.7936646342277527,0.8530160188674927,1772709,2522578,67780,58208,2003291,1253422,17315,29739,17096,29639,"tensor([ 7983, 8068, 8525, 9345, 10675, 12335, 13892, 15822, 16716, 12552,
19
+ 1776], device='cuda:0')","tensor([24, 30, 70, 78, 66, 36, 13, 8, 0, 0, 0], device='cuda:0')",59000,2006000,3,91.89859771728516,60.149044036865234,0.3572663366794586,0.7488115429878235,0.8073451519012451,438787,1211191,11571,17778,1567213,794809,994,4731,992,4729,"tensor([1753, 1770, 1941, 2227, 2549, 2844, 3161, 3272, 2873, 1653, 179],
20
+ device='cuda:0')","tensor([ 7, 13, 53, 62, 52, 24, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,91.89859771728516,60.149044036865234,0.1866106390953064,0.5740920901298523,0.7614333629608154,52305,205407,1435,3343,454534,301432,0,0,0,0,"tensor([121, 121, 143, 195, 247, 296, 341, 412, 472, 452, 108],
21
+ device='cuda:0')","tensor([ 5, 5, 23, 35, 26, 26, 17, 21, 25, 26, 5], device='cuda:0')"
22
+ 5,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_5/,autoencoders/matryoshka_othello/trainer_5/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,119.87999725341795,253.87518310546875,4.644160270690918,0.193115234375,0.9559956192970276,0.9785467982292176,0.9880425930023192,2.054067611694336,2.054514169692993,4.913147449493408,0.9998438358306884,1980,59000,3776000,3,112.27537536621094,65.73728942871094,0.5687968730926514,0.7648139595985413,0.827092170715332,1526112,2375974,63993,61234,2249888,1400026,12981,26555,12864,26466,"tensor([10485, 10572, 10865, 11971, 13729, 15704, 17633, 19524, 19893, 14025,
23
+ 2656], device='cuda:0')","tensor([20, 30, 61, 75, 56, 28, 13, 7, 0, 0, 0], device='cuda:0')",59000,2006000,3,112.27537536621094,65.73728942871094,0.2765778005123138,0.7115323543548584,0.7735953330993652,323095,1117638,7282,17858,1682905,888362,761,3680,761,3680,"tensor([1897, 1911, 2084, 2369, 2709, 3053, 3307, 3420, 3022, 1703, 277],
24
+ device='cuda:0')","tensor([ 3, 13, 44, 59, 44, 17, 2, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,112.27537536621094,47.168827056884766,0.1614082604646682,0.5156950950622559,0.7384083867073059,44572,176960,878,2498,462267,329879,0,0,0,0,"tensor([182, 182, 197, 259, 353, 407, 483, 541, 588, 525, 170],
25
+ device='cuda:0')","tensor([ 7, 7, 13, 35, 40, 37, 27, 35, 33, 34, 10], device='cuda:0')"
26
+ 6,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_6/,autoencoders/matryoshka_othello/trainer_6/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,140.72000122070312,274.9422912597656,4.22182559967041,0.203369140625,0.9637076258659364,0.9823663234710692,0.9910241961479188,2.054067611694336,2.0540716648101807,4.913147449493408,0.9999985694885254,1974,59000,3776000,3,133.19662475585938,67.96151733398438,0.5434357523918152,0.7493254542350769,0.8074802160263062,1432582,2298925,63732,61060,2343418,1477075,10912,22818,10793,22733,"tensor([13091, 13181, 13517, 14881, 16937, 19239, 20924, 22880, 22398, 15971,
27
+ 3176], device='cuda:0')","tensor([15, 26, 55, 66, 52, 32, 13, 8, 0, 0, 0], device='cuda:0')",59000,2006000,3,133.19662475585938,67.96151733398438,0.2415185868740081,0.6883746385574341,0.7433616518974304,276219,1061255,5133,16110,1729781,944745,932,3254,932,3254,"tensor([2063, 2075, 2238, 2513, 2845, 3123, 3280, 3394, 2944, 1743, 315],
28
+ device='cuda:0')","tensor([ 0, 10, 39, 51, 40, 21, 2, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,133.19662475585938,47.18097305297852,0.1341475993394851,0.4419623017311096,0.7062652111053467,36497,144245,796,1664,470342,362594,0,0,0,0,"tensor([194, 194, 208, 273, 370, 468, 520, 565, 609, 546, 195],
29
+ device='cuda:0')","tensor([ 5, 5, 8, 30, 33, 36, 34, 34, 39, 32, 12], device='cuda:0')"
30
+ 7,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_7/,autoencoders/matryoshka_othello/trainer_7/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,162.27999877929688,311.59588623046875,3.958779811859131,0.212158203125,0.9683021903038024,0.9847481846809388,0.9923293590545654,2.054067611694336,2.054280996322632,4.913147449493408,0.9999253749847412,1936,59000,3776000,3,152.7898406982422,66.33949279785156,0.4961225986480713,0.6947320103645325,0.770248293876648,1263364,2043233,53587,62843,2512636,1732767,10094,21246,9993,21180,"tensor([15695, 16124, 17168, 18809, 20825, 22828, 24530, 26155, 24289, 16490,
31
+ 3599], device='cuda:0')","tensor([12, 22, 49, 55, 48, 23, 13, 8, 0, 0, 0], device='cuda:0')",59000,2006000,4,152.7898406982422,45.036746978759766,0.2057108730077743,0.6271664500236511,0.7020303010940552,230426,920538,3864,9008,1775574,1085462,865,1483,865,1483,"tensor([2185, 2209, 2377, 2621, 2870, 3094, 3245, 3287, 2831, 1689, 353],
32
+ device='cuda:0')","tensor([ 0, 8, 35, 42, 37, 12, 2, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,152.7898406982422,45.036746978759766,0.1144734248518943,0.3629881739616394,0.5735284090042114,30806,112710,576,1463,476033,394129,0,0,0,0,"tensor([278, 278, 286, 348, 404, 478, 550, 611, 659, 595, 201],
33
+ device='cuda:0')","tensor([11, 11, 14, 29, 32, 32, 27, 29, 40, 34, 6], device='cuda:0')"
34
+ 8,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_8/,autoencoders/matryoshka_othello/trainer_8/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,184.3600006103516,375.6151428222656,3.6874828338623047,0.210693359375,0.9723998308181764,0.9866716265678406,0.9937899708747864,2.054067611694336,2.0540847778320312,4.913147449493408,0.9999939799308776,1872,59000,3776000,3,173.7223663330078,69.01719665527344,0.4736232161521911,0.612291157245636,0.7087127566337585,1187521,1689490,51102,53094,2588479,2086510,9565,15240,9488,15181,"tensor([17659, 17961, 19289, 20799, 22906, 24894, 26307, 27037, 25040, 17203,
35
+ 3670], device='cuda:0')","tensor([ 8, 18, 32, 37, 35, 16, 12, 9, 0, 0, 0], device='cuda:0')",59000,2006000,4,173.7223663330078,43.89278030395508,0.1867111921310424,0.4751405417919159,0.5755449533462524,206904,627076,3396,6463,1799096,1378924,786,852,786,851,"tensor([2325, 2343, 2494, 2688, 2906, 3064, 3141, 3050, 2645, 1661, 351],
36
+ device='cuda:0')","tensor([ 0, 6, 19, 24, 24, 5, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,173.7223663330078,43.89278030395508,0.1107524931430816,0.3399452567100525,0.5145844221115112,29751,104062,662,1327,477088,402777,0,0,0,0,"tensor([324, 324, 346, 421, 507, 558, 594, 626, 644, 631, 214],
37
+ device='cuda:0')","tensor([15, 15, 17, 33, 40, 38, 34, 36, 40, 31, 9], device='cuda:0')"
38
+ 9,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_9/,autoencoders/matryoshka_othello/trainer_9/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,208.3199920654297,457.0523681640625,3.474344491958618,0.210693359375,0.9758817553520204,0.988358736038208,0.9950173497200012,2.054067611694336,2.054042339324951,4.913147449493408,1.0000088214874268,1789,59000,3776000,3,194.19448852539065,71.17973327636719,0.4525443613529205,0.5791054368019104,0.6658478379249573,1117804,1559077,46282,49355,2658196,2216923,9156,13502,9088,13466,"tensor([18890, 19068, 20286, 22094, 24208, 26400, 27700, 28507, 25513, 17230,
39
+ 4028], device='cuda:0')","tensor([ 8, 14, 25, 32, 27, 12, 10, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,194.19448852539065,71.17973327636719,0.1671759635210037,0.435017466545105,0.5147745013237,183217,559725,2689,7620,1822783,1446275,730,860,730,860,"tensor([2180, 2188, 2339, 2542, 2727, 2891, 2932, 2903, 2497, 1619, 389],
40
+ device='cuda:0')","tensor([ 0, 2, 13, 20, 17, 2, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,194.19448852539065,42.69601821899414,0.1029135733842849,0.2330352216958999,0.4191470444202423,27523,66996,514,1151,479316,439843,0,0,0,0,"tensor([331, 331, 346, 416, 494, 568, 625, 660, 691, 661, 244],
41
+ device='cuda:0')","tensor([13, 13, 14, 20, 31, 31, 35, 34, 37, 41, 14], device='cuda:0')"
42
+ 10,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_10/,autoencoders/matryoshka_othello/trainer_10/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,228.59999084472656,534.2022705078125,3.2362053394317627,0.203369140625,0.9792519211769104,0.9899451732635498,0.9941858649253844,2.054067611694336,2.0541346073150635,4.913147449493408,0.9999765753746032,1707,59000,3776000,3,214.98684692382807,74.27256774902344,0.4247049391269684,0.5244215130805969,0.6110751628875732,1029604,1358190,42957,45575,2746396,2417810,9552,12230,9491,12201,"tensor([19891, 19995, 21180, 23371, 25260, 26835, 28276, 28982, 24922, 16921,
43
+ 3927], device='cuda:0')","tensor([ 7, 8, 16, 23, 22, 10, 10, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,214.98684692382807,74.27256774902344,0.1520613431930542,0.3610906898975372,0.4321123361587524,165258,443371,2312,6361,1840742,1562629,732,612,732,612,"tensor([2194, 2198, 2324, 2501, 2634, 2709, 2747, 2757, 2272, 1499, 369],
44
+ device='cuda:0')","tensor([ 0, 0, 8, 14, 13, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,214.98684692382807,43.13955307006836,0.0960441380739212,0.1836436539888382,0.3245460093021393,25590,51334,451,888,481249,455505,0,0,0,0,"tensor([381, 381, 397, 454, 530, 592, 628, 681, 681, 634, 240],
45
+ device='cuda:0')","tensor([17, 17, 17, 20, 31, 32, 33, 40, 46, 44, 11], device='cuda:0')"
46
+ 11,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_11/,autoencoders/matryoshka_othello/trainer_11/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,247.1199951171875,595.2257080078125,3.062748432159424,0.199951171875,0.9814200401306152,0.9909713268280028,0.9953888654708862,2.054067611694336,2.0539865493774414,4.913147449493408,1.000028371810913,1615,59000,3776000,3,235.2872009277344,77.79491424560547,0.4016973674297333,0.4904955625534057,0.5835108757019043,958851,1240953,39146,43044,2817149,2535047,8322,10563,8267,10533,"tensor([21175, 21216, 22265, 24300, 26572, 28573, 29297, 27666, 24001, 17062,
47
+ 4198], device='cuda:0')","tensor([ 5, 7, 15, 21, 19, 8, 7, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,235.2872009277344,77.79491424560547,0.1351493149995803,0.3390850722789764,0.4126876890659332,145540,410807,2226,6225,1860460,1595193,702,546,702,546,"tensor([2039, 2040, 2170, 2354, 2497, 2652, 2688, 2496, 2131, 1519, 398],
48
+ device='cuda:0')","tensor([ 0, 0, 8, 14, 11, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,8,235.2872009277344,2.394713878631592,0.0901950523257255,0.1370750665664672,0.2549251914024353,23960,37335,494,564,482879,469504,0,0,0,0,"tensor([397, 397, 422, 478, 565, 631, 674, 684, 711, 654, 245],
49
+ device='cuda:0')","tensor([22, 22, 23, 28, 35, 38, 45, 46, 51, 44, 12], device='cuda:0')"
50
+ 12,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_12/,autoencoders/matryoshka_othello/trainer_12/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,271.5599975585937,654.33203125,2.8832952976226807,0.192138671875,0.9833529591560364,0.9918925166130066,0.9966205954551696,2.054067611694336,2.054128408432007,4.913147449493408,0.9999787211418152,1575,59000,3776000,3,256.135986328125,82.25318908691406,0.3809117376804352,0.468325138092041,0.5484753847122192,896579,1167424,34963,42104,2879421,2608576,8640,10343,8573,10315,"tensor([22183, 22188, 23469, 25735, 27816, 29114, 29468, 27806, 23865, 16459,
51
+ 3946], device='cuda:0')","tensor([ 2, 5, 9, 16, 13, 7, 7, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,256.135986328125,82.25318908691406,0.1238791719079017,0.2902453243732452,0.3532638251781463,132587,341516,1999,5776,1873413,1664484,791,551,790,551,"tensor([1956, 1956, 2119, 2333, 2468, 2534, 2539, 2347, 2021, 1441, 381],
52
+ device='cuda:0')","tensor([0, 0, 2, 9, 6, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,8,256.135986328125,2.2150840759277344,0.0899313017725944,0.1264956444501876,0.2340297549962997,23888,34248,523,402,482951,472591,0,0,0,0,"tensor([386, 386, 420, 512, 584, 628, 671, 698, 707, 638, 241],
53
+ device='cuda:0')","tensor([22, 22, 23, 29, 36, 37, 44, 44, 49, 47, 8], device='cuda:0')"
54
+ 13,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_13/,autoencoders/matryoshka_othello/trainer_13/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,289.8399963378906,707.417236328125,2.697723865509033,0.1826171875,0.9854848980903624,0.9929239153862,0.995458483695984,2.054067611694336,2.054515838623047,4.913147449493408,0.9998432397842408,1497,59000,3776000,3,276.1554870605469,88.50450897216797,0.3710063993930816,0.4198465943336487,0.508614718914032,867364,1012249,32372,33746,2908636,2763751,7857,8219,7802,8186,"tensor([22680, 22685, 23319, 25833, 28213, 29363, 29542, 28197, 23753, 16424,
55
+ 4091], device='cuda:0')","tensor([1, 5, 5, 8, 6, 5, 5, 3, 0, 0, 0], device='cuda:0')",59000,2006000,3,276.1554870605469,88.50450897216797,0.117937833070755,0.2163107842206955,0.2852068543434143,125811,243816,1703,4496,1880189,1762184,681,396,681,396,"tensor([1958, 1958, 2052, 2268, 2419, 2487, 2450, 2332, 1996, 1390, 393],
56
+ device='cuda:0')","tensor([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,8,276.1554870605469,2.1634888648986816,0.0881554260849952,0.116227276623249,0.2024181932210922,23392,31291,468,315,483447,475548,0,0,0,0,"tensor([397, 397, 433, 532, 599, 630, 685, 713, 738, 625, 243],
57
+ device='cuda:0')","tensor([22, 22, 26, 30, 32, 35, 41, 44, 51, 39, 12], device='cuda:0')"
58
+ 14,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_14/,autoencoders/matryoshka_othello/trainer_14/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,310.8399963378906,764.2993774414062,2.615259885787964,0.169189453125,0.9864574074745178,0.9933949112892152,0.9952017068862916,2.054067611694336,2.0541067123413086,4.913147449493408,0.9999863505363464,1443,59000,3776000,3,297.3014221191406,95.52342224121094,0.3599364757537842,0.3819637298583984,0.483002245426178,835100,900207,29163,37366,2940900,2875793,8070,7924,7996,7895,"tensor([22859, 22872, 23669, 26252, 28502, 30156, 30299, 28702, 24374, 16114,
59
+ 4956], device='cuda:0')","tensor([0, 0, 0, 1, 2, 3, 1, 0, 0, 0, 0], device='cuda:0')",59000,2006000,3,297.3014221191406,95.52342224121094,0.1166601106524467,0.1574596762657165,0.2267763465642929,124371,171669,1823,2813,1881629,1834331,648,440,647,440,"tensor([1895, 1895, 2001, 2234, 2377, 2476, 2468, 2321, 1992, 1351, 469],
60
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,297.3014221191406,95.52342224121094,0.0934296026825904,0.096623308956623,0.1705386638641357,24859,25749,446,389,481980,481090,0,0,0,0,"tensor([399, 401, 447, 535, 599, 660, 688, 694, 707, 618, 298],
61
+ device='cuda:0')","tensor([21, 21, 21, 31, 31, 42, 40, 41, 43, 43, 16], device='cuda:0')"
62
+ 15,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_15/,autoencoders/matryoshka_othello/trainer_15/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,333.91998291015625,819.3239135742188,2.465003490447998,0.1611328125,0.9879618287086488,0.9940750002861024,0.995798647403717,2.054067611694336,2.0541460514068604,4.913147449493408,0.9999725818634032,1403,59000,3776000,0,317.4093933105469,317.4093933105469,0.3522833883762359,0.3522833883762359,0.4444047808647156,813362,813362,28295,28295,2962638,2962638,7702,7702,7654,7654,"tensor([23185, 23195, 23981, 26259, 28147, 29593, 30329, 28607, 23925, 16137,
63
+ 3954], device='cuda:0')","tensor([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,2,317.4093933105469,174.72747802734375,0.1087888553738594,0.109014943242073,0.161272719502449,115494,115750,1775,1812,1890506,1890250,701,573,701,573,"tensor([1878, 1883, 1993, 2184, 2318, 2406, 2442, 2297, 1935, 1329, 370],
64
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,317.4093933105469,104.05632781982422,0.0844764038920402,0.0918102115392685,0.1465329378843307,22374,24406,497,417,484465,482433,0,0,0,0,"tensor([376, 378, 434, 555, 608, 658, 701, 723, 719, 625, 231],
65
+ device='cuda:0')","tensor([20, 21, 23, 32, 41, 42, 43, 43, 48, 42, 11], device='cuda:0')"
66
+ 16,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_16/,autoencoders/matryoshka_othello/trainer_16/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,354.5599975585937,872.7642822265625,2.2573609352111816,0.153564453125,0.9898377060890198,0.9950308799743652,0.9959880113601683,2.054067611694336,2.05419659614563,4.913147449493408,0.999954879283905,1357,59000,3776000,0,338.1923828125,338.1923828125,0.3401058912277221,0.3401058912277221,0.4206508994102478,779105,779105,26439,26439,2996895,2996895,6872,6872,6815,6815,"tensor([23346, 23359, 23800, 26144, 27963, 29101, 28679, 26974, 22132, 15491,
67
+ 4451], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,338.1923828125,290.7479553222656,0.0970010161399841,0.0970679447054863,0.1291353106498718,102337,102411,1682,1678,1903663,1903589,570,569,569,568,"tensor([1883, 1885, 1979, 2201, 2304, 2344, 2263, 2104, 1750, 1273, 426],
68
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,338.1923828125,112.71033477783205,0.0754797160625457,0.0850850120186805,0.1251789331436157,19898,22536,504,354,486941,484303,0,0,0,0,"tensor([393, 395, 440, 560, 631, 674, 670, 676, 677, 607, 277],
69
+ device='cuda:0')","tensor([24, 24, 25, 35, 36, 39, 43, 47, 44, 38, 13], device='cuda:0')"
70
+ 17,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_17/,autoencoders/matryoshka_othello/trainer_17/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,372.8800048828125,926.5501098632812,2.116051197052002,0.1455078125,0.991376519203186,0.9956154227256776,0.9965057969093324,2.054067611694336,2.0541765689849854,4.913147449493408,0.9999619126319884,1320,59000,3776000,0,358.1425170898437,358.1425170898437,0.3324368596076965,0.3324368596076965,0.4063954651355743,757789,757789,25206,25206,3018211,3018211,6385,6385,6334,6334,"tensor([24675, 24682, 25349, 27123, 28309, 28956, 27821, 25905, 22054, 15903,
71
+ 4849], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,358.1425170898437,309.5303039550781,0.0888127610087394,0.0889149531722068,0.1146357059478759,93286,93398,1448,1441,1912714,1912602,480,479,480,479,"tensor([1862, 1865, 1998, 2185, 2256, 2289, 2190, 1989, 1725, 1286, 464],
72
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,358.1425170898437,120.12174224853516,0.0716997906565666,0.080491118133068,0.1164091974496841,18864,21267,491,325,487975,485572,0,0,0,0,"tensor([404, 404, 491, 581, 655, 704, 700, 696, 716, 634, 287],
73
+ device='cuda:0')","tensor([20, 20, 28, 36, 40, 43, 40, 45, 44, 40, 12], device='cuda:0')"
74
+ 18,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_18/,autoencoders/matryoshka_othello/trainer_18/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,393.3599853515625,977.3129272460938,1.862903356552124,0.138427734375,0.9928845167160034,0.9965375065803528,0.9965564012527466,2.054067611694336,2.054072141647339,4.913147449493408,0.9999983906745912,1245,59000,3776000,0,379.510009765625,379.510009765625,0.3191245496273041,0.3191245496273041,0.3881958425045013,721154,721154,22423,22423,3054846,3054846,5616,5616,5563,5563,"tensor([23867, 23885, 24359, 25717, 27450, 27677, 27217, 24588, 19811, 14830,
75
+ 5344], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,379.510009765625,330.9790344238281,0.0792772620916366,0.0793118998408317,0.0998347327113151,82855,82892,1404,1387,1923145,1923108,437,434,437,434,"tensor([1767, 1771, 1874, 2008, 2153, 2137, 2108, 1885, 1537, 1192, 498],
76
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,379.510009765625,69.05247497558594,0.0709888860583305,0.076069638133049,0.1071947440505027,18671,20048,516,209,488168,486791,0,0,0,0,"tensor([428, 430, 506, 581, 667, 689, 705, 694, 630, 585, 314],
77
+ device='cuda:0')","tensor([27, 27, 29, 40, 43, 46, 48, 49, 40, 38, 17], device='cuda:0')"
78
+ 19,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_19/,autoencoders/matryoshka_othello/trainer_19/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,414.91998291015625,1019.5823364257812,1.693614482879639,0.13623046875,0.9943257570266724,0.9971392750740052,0.9965102076530457,2.054067611694336,2.054236650466919,4.913147449493408,0.9999408721923828,1189,59000,3776000,0,400.29974365234375,400.29974365234375,0.2922493517398834,0.2922493517398834,0.3592918217182159,649253,649253,17891,17891,3126747,3126747,3816,3816,3793,3793,"tensor([23418, 23460, 23478, 24578, 25433, 25167, 23335, 20836, 17225, 13387,
79
+ 4693], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,400.29974365234375,400.29974365234375,0.0623162426054477,0.0623162426054477,0.0808260515332222,64552,64552,1203,1203,1941448,1941448,333,333,333,333,"tensor([1598, 1606, 1680, 1810, 1907, 1899, 1773, 1593, 1326, 1064, 440],
80
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,400.29974365234375,75.95011138916016,0.0555796213448047,0.0659169182181358,0.0925396382808685,14500,17282,435,236,492339,489557,0,0,0,0,"tensor([387, 392, 451, 551, 630, 642, 649, 645, 612, 538, 288],
81
+ device='cuda:0')","tensor([25, 26, 27, 35, 41, 43, 42, 41, 40, 36, 15], device='cuda:0')"
82
+ 20,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_20/,autoencoders/matryoshka_othello/trainer_20/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,9.4399995803833,51.98342514038086,12.217387199401855,0.0245361328125,0.6901394128799438,0.8368940353393555,0.849263608455658,2.054067611694336,2.5678534507751465,4.913147449493408,0.8202967643737793,2534,59000,3776000,0,9.985015869140623,9.985015869140623,0.6314403414726257,0.6314403414726257,0.6913968324661255,1761268,1761268,41305,41305,2014732,2014732,13177,13177,13095,13095,"tensor([19011, 19011, 19011, 19017, 19067, 19434, 20006, 20472, 20391, 15369,
83
+ 2182], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,9.985015869140623,9.985015869140623,0.5106327533721924,0.5106327533721924,0.5424720048904419,691565,691565,11094,11094,1314435,1314435,3143,3143,3140,3140,"tensor([6277, 6277, 6277, 6277, 6292, 6382, 6465, 6480, 5819, 3145, 213],
84
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,0,9.985015869140623,9.985015869140623,0.5258867740631104,0.5258867740631104,0.605864942073822,181549,181549,2061,2061,325290,325290,0,0,0,0,"tensor([590, 590, 590, 590, 595, 619, 688, 784, 860, 742, 118],
85
+ device='cuda:0')","tensor([32, 32, 32, 32, 33, 35, 36, 34, 34, 25, 6], device='cuda:0')"
86
+ 21,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_21/,autoencoders/matryoshka_othello/trainer_21/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,30.35999870300293,112.67391204833984,8.406524658203125,0.048095703125,0.8532488942146301,0.9266902208328248,0.9364179968833924,2.054067611694336,2.0598807334899902,4.913147449493408,0.997966766357422,2351,59000,3776000,0,29.9969482421875,29.9969482421875,0.7910191416740417,0.7910191416740417,0.8393763899803162,2507967,2507967,57136,57136,1268033,1268033,27002,27002,26744,26744,"tensor([15944, 15944, 15961, 16041, 16361, 17312, 18383, 19606, 19829, 15951,
87
+ 3288], device='cuda:0')","tensor([12, 12, 12, 12, 12, 11, 9, 4, 0, 0, 0], device='cuda:0')",59000,2006000,3,29.9969482421875,28.96919059753418,0.7310500144958496,0.7317075133323669,0.7711263298988342,1170955,1170450,26533,22779,835045,835550,8995,6968,8966,6950,"tensor([4046, 4046, 4051, 4102, 4279, 4608, 4913, 4982, 4449, 2518, 300],
88
+ device='cuda:0')","tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,0,29.9969482421875,29.9969482421875,0.6568450331687927,0.6568450331687927,0.8117663860321045,250144,250144,4670,4670,256695,256695,0,0,0,0,"tensor([304, 304, 305, 313, 336, 408, 471, 560, 624, 571, 189],
89
+ device='cuda:0')","tensor([60, 60, 60, 57, 51, 43, 30, 30, 37, 30, 7], device='cuda:0')"
90
+ 22,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_22/,autoencoders/matryoshka_othello/trainer_22/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,52.39999771118164,159.36683654785156,6.458910942077637,0.066650390625,0.913706123828888,0.9574987292289734,0.9641538858413696,2.054067611694336,2.0557403564453125,4.913147449493408,0.9994149208068848,2054,59000,3776000,3,51.00447845458984,44.02774429321289,0.8091054558753967,0.824268102645874,0.8692331314086914,2628443,2684743,92715,53504,1147557,1091257,45226,27252,44553,27036,"tensor([13967, 14131, 14287, 14664, 15669, 16894, 18398, 19805, 20386, 16492,
91
+ 3783], device='cuda:0')","tensor([35, 35, 36, 35, 29, 22, 11, 6, 0, 0, 0], device='cuda:0')",59000,2006000,3,51.00447845458984,44.02774429321289,0.7306276559829712,0.7722402215003967,0.8160722851753235,1178985,1274433,42336,20180,827015,731567,12846,5641,12823,5638,"tensor([2702, 2706, 2750, 2910, 3282, 3650, 3917, 4058, 3660, 2127, 350],
92
+ device='cuda:0')","tensor([22, 22, 23, 23, 17, 10, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,51.00447845458984,44.02774429321289,0.3162685334682464,0.6373100876808167,0.8312321305274963,95759,238643,2957,3425,411080,268196,0,0,0,0,"tensor([217, 217, 245, 279, 345, 419, 472, 525, 579, 579, 205],
93
+ device='cuda:0')","tensor([29, 29, 52, 56, 46, 40, 32, 32, 34, 35, 6], device='cuda:0')"
94
+ 23,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_23/,autoencoders/matryoshka_othello/trainer_23/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,72.68000030517578,193.06866455078125,5.606268882751465,0.0811767578125,0.9344375729560852,0.9678757190704346,0.9762732982635498,2.054067611694336,2.0542540550231934,4.913147449493408,0.9999347925186156,2258,59000,3776000,2,70.83123779296875,62.49774932861328,0.7580947279930115,0.828872561454773,0.8606619238853455,2369731,2727046,106076,77087,1406269,1048954,45708,43297,44707,42810,"tensor([12203, 12737, 12980, 13806, 15616, 17453, 19373, 20702, 21308, 16758,
95
+ 3714], device='cuda:0')","tensor([50, 51, 76, 68, 56, 32, 12, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,70.83123779296875,53.771141052246094,0.6456669569015503,0.7893280982971191,0.8253338932991028,974786,1320746,38684,19761,1031214,685254,5816,5407,5809,5401,"tensor([2716, 2734, 2857, 3108, 3519, 3916, 4240, 4294, 3799, 2101, 333],
96
+ device='cuda:0')","tensor([35, 36, 61, 55, 45, 21, 2, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,70.83123779296875,42.93726348876953,0.216208353638649,0.6067290306091309,0.8428568243980408,61653,222061,1819,3094,445186,284778,0,0,0,0,"tensor([187, 189, 220, 259, 325, 392, 440, 504, 555, 528, 207],
97
+ device='cuda:0')","tensor([ 9, 11, 35, 40, 42, 35, 25, 25, 33, 30, 5], device='cuda:0')"
98
+ 24,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_24/,autoencoders/matryoshka_othello/trainer_24/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,95.43999481201172,224.60096740722656,4.996743202209473,0.092529296875,0.948758602142334,0.9749541282653807,0.9810227751731871,2.054067611694336,2.0541107654571533,4.913147449493408,0.9999849200248718,2464,59000,3776000,2,91.94552612304688,73.98579406738281,0.6708088517189026,0.8252156972885132,0.8582485318183899,1948274,2711082,84457,83520,1827726,1064918,25955,44681,25527,44307,"tensor([13021, 14037, 14588, 16105, 18137, 20550, 22702, 24495, 24558, 17468,
99
+ 2709], device='cuda:0')","tensor([28, 45, 83, 85, 76, 33, 16, 7, 1, 0, 0], device='cuda:0')",59000,2006000,3,91.94552612304688,60.38770294189453,0.4244105517864227,0.7780733108520508,0.8112568259239197,545685,1289618,19811,19283,1460315,716382,1859,4590,1848,4584,"tensor([2738, 2792, 2985, 3335, 3821, 4283, 4543, 4516, 3893, 2037, 255],
100
+ device='cuda:0')","tensor([10, 27, 65, 69, 62, 21, 4, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,91.94552612304688,60.38770294189453,0.1809167861938476,0.6104936003684998,0.8296582102775574,50541,223897,1341,2759,456298,282942,0,0,0,0,"tensor([214, 215, 249, 309, 391, 464, 520, 555, 612, 530, 156],
101
+ device='cuda:0')","tensor([11, 12, 37, 48, 48, 39, 33, 31, 40, 33, 10], device='cuda:0')"
102
+ 25,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_25/,autoencoders/matryoshka_othello/trainer_25/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,116.5999984741211,244.7445831298828,4.460059642791748,0.1029052734375,0.959475874900818,0.9802156090736388,0.9865991473197936,2.054067611694336,2.054595470428467,4.913147449493408,0.9998153448104858,2541,59000,3776000,3,111.96609497070312,65.09745025634766,0.6221175193786621,0.8121830821037292,0.852545440196991,1738858,2632623,75269,74208,2037142,1143377,17771,34205,17550,33956,"tensor([15660, 16424, 17057, 19061, 21664, 24307, 26840, 28627, 27461, 19125,
103
+ 3685], device='cuda:0')","tensor([22, 42, 83, 87, 75, 39, 14, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,111.96609497070312,65.09745025634766,0.3284350335597992,0.7588121294975281,0.8015691041946411,395935,1236696,9105,16862,1610065,769304,1556,4006,1551,3999,"tensor([2983, 3033, 3242, 3607, 4045, 4404, 4691, 4673, 3825, 2083, 339],
104
+ device='cuda:0')","tensor([ 2, 22, 63, 67, 62, 27, 2, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,111.96609497070312,65.09745025634766,0.1843328028917312,0.6067309379577637,0.818635880947113,51593,221937,1349,2807,455246,284902,0,0,0,0,"tensor([216, 216, 242, 327, 407, 497, 563, 616, 655, 579, 214],
105
+ device='cuda:0')","tensor([13, 13, 33, 50, 51, 43, 36, 43, 46, 33, 13], device='cuda:0')"
106
+ 26,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_26/,autoencoders/matryoshka_othello/trainer_26/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,138.63999938964844,258.3094177246094,4.0212082862854,0.108642578125,0.967160940170288,0.984063982963562,0.9921851754188538,2.054067611694336,2.053964614868164,4.913147449493408,1.0000360012054443,2578,59000,3776000,3,133.0915985107422,67.77018737792969,0.582086980342865,0.7888503670692444,0.8299218416213989,1583871,2505379,82171,70596,2192129,1270621,16531,29944,16323,29732,"tensor([20378, 20940, 21590, 23482, 26240, 29399, 31594, 32759, 30215, 20961,
107
+ 4499], device='cuda:0')","tensor([16, 38, 72, 78, 71, 32, 14, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,133.0915985107422,67.77018737792969,0.2763184010982513,0.7318984270095825,0.77180415391922,322703,1166802,7030,15624,1683297,839198,1353,3746,1352,3746,"tensor([3414, 3455, 3645, 3960, 4357, 4691, 4848, 4702, 3894, 2148, 432],
108
+ device='cuda:0')","tensor([ 0, 20, 54, 61, 58, 20, 2, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,133.0915985107422,67.77018737792969,0.1477232277393341,0.6215002536773682,0.8165282607078552,40493,230016,896,3341,466346,276823,0,0,0,0,"tensor([279, 279, 309, 390, 496, 558, 642, 684, 685, 654, 261],
109
+ device='cuda:0')","tensor([18, 18, 35, 57, 56, 44, 46, 47, 49, 38, 9], device='cuda:0')"
110
+ 27,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_27/,autoencoders/matryoshka_othello/trainer_27/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,161.83999633789062,279.06756591796875,3.6138100624084473,0.112060546875,0.9735986590385436,0.987299382686615,0.995215117931366,2.054067611694336,2.0541508197784424,4.913147449493408,0.9999709129333496,2583,59000,3776000,3,153.3097686767578,69.95897674560547,0.5635640025138855,0.7740734219551086,0.8082035183906555,1515148,2428110,85874,69481,2260852,1347890,17697,24037,17499,23925,"tensor([24241, 24644, 25319, 27555, 30263, 32978, 34458, 35179, 31292, 20616,
111
+ 3889], device='cuda:0')","tensor([18, 39, 64, 75, 66, 29, 11, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,153.3097686767578,69.95897674560547,0.2520097494125366,0.696379542350769,0.7297086119651794,290054,1079438,5873,14705,1715946,926562,1286,2794,1286,2794,"tensor([3679, 3710, 3867, 4171, 4520, 4730, 4773, 4540, 3603, 1954, 371],
112
+ device='cuda:0')","tensor([ 0, 19, 44, 55, 49, 17, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,153.3097686767578,69.95897674560547,0.1175246909260749,0.5646802186965942,0.7941409945487976,31692,200359,794,2439,475147,306480,0,0,0,0,"tensor([312, 312, 342, 434, 544, 628, 666, 704, 697, 654, 224],
113
+ device='cuda:0')","tensor([18, 18, 32, 58, 60, 51, 43, 45, 49, 42, 11], device='cuda:0')"
114
+ 28,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_28/,autoencoders/matryoshka_othello/trainer_28/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,185.0800018310547,306.6699523925781,3.2574055194854736,0.1126708984375,0.9785327315330504,0.989672064781189,0.9954099655151368,2.054067611694336,2.054171323776245,4.913147449493408,0.9999637007713318,2531,59000,3776000,3,174.19786071777344,71.6718521118164,0.5137755274772644,0.7410517930984497,0.7912662029266357,1330198,2259125,71931,61952,2445802,1516875,17174,21589,16998,21468,"tensor([28909, 29365, 29895, 31931, 34349, 36870, 38575, 37537, 32839, 20417,
115
+ 3762], device='cuda:0')","tensor([13, 32, 58, 67, 61, 30, 11, 9, 0, 0, 0], device='cuda:0')",59000,2006000,3,174.19786071777344,71.6718521118164,0.2193938344717025,0.6658247113227844,0.7183109521865845,247718,1007990,4486,13804,1758282,998010,1432,2358,1430,2355,"tensor([3965, 3997, 4134, 4373, 4592, 4704, 4728, 4249, 3431, 1872, 363],
116
+ device='cuda:0')","tensor([ 0, 15, 41, 51, 46, 18, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,174.19786071777344,49.01318359375,0.1199695467948913,0.4855944812297821,0.7241165637969971,32394,163075,804,1737,474445,343764,0,0,0,0,"tensor([410, 411, 439, 551, 643, 718, 761, 751, 741, 654, 227],
117
+ device='cuda:0')","tensor([26, 26, 33, 57, 64, 57, 51, 52, 59, 40, 10], device='cuda:0')"
118
+ 29,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_29/,autoencoders/matryoshka_othello/trainer_29/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,204.44000244140625,347.7501525878906,3.0032670497894287,0.1124267578125,0.9817346930503844,0.9912129044532776,0.9955698847770692,2.054067611694336,2.0540127754211426,4.913147449493408,1.0000191926956177,2432,59000,3776000,3,193.9471435546875,77.63337707519531,0.489005297422409,0.7223190665245056,0.7826229333877563,1242551,2168114,63402,59089,2533449,1607886,15325,20187,15187,20108,"tensor([31061, 31256, 31914, 33719, 36101, 37334, 38045, 35688, 30790, 19004,
119
+ 3806], device='cuda:0')","tensor([12, 25, 54, 61, 58, 29, 11, 8, 0, 0, 0], device='cuda:0')",59000,2006000,3,193.9471435546875,77.63337707519531,0.1965099275112152,0.6132179498672485,0.6888105869293213,218983,892398,3739,12143,1787017,1113602,1290,1689,1288,1689,"tensor([3814, 3828, 3953, 4151, 4334, 4337, 4264, 3840, 3038, 1749, 380],
120
+ device='cuda:0')","tensor([ 0, 8, 36, 43, 43, 17, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,193.9471435546875,77.63337707519531,0.1104515120387077,0.5793190598487854,0.7723321318626404,29667,207515,689,2056,477172,299324,0,0,0,0,"tensor([428, 430, 466, 561, 651, 697, 736, 764, 792, 694, 234],
121
+ device='cuda:0')","tensor([25, 25, 45, 67, 68, 56, 49, 50, 58, 49, 10], device='cuda:0')"
122
+ 30,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_30/,autoencoders/matryoshka_othello/trainer_30/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,225.63999938964844,411.7349853515625,2.7807531356811523,0.1083984375,0.9844720363616944,0.9925510287284852,0.996640920639038,2.054067611694336,2.0539581775665283,4.913147449493408,1.0000382661819458,2352,59000,3776000,3,214.83485412597656,81.65939331054688,0.4573381841182709,0.6634397506713867,0.7297976613044739,1134490,1900880,50784,53497,2641510,1875120,13735,16406,13632,16341,"tensor([32980, 33076, 33740, 35791, 38118, 39414, 38701, 36510, 30303, 18820,
123
+ 4092], device='cuda:0')","tensor([ 7, 18, 38, 44, 47, 22, 10, 6, 0, 0, 0], device='cuda:0')",59000,2006000,3,214.83485412597656,81.65939331054688,0.1744454503059387,0.5080719590187073,0.5910968780517578,191980,686458,3052,9750,1814020,1319542,1161,1218,1160,1218,"tensor([3622, 3628, 3723, 3955, 4091, 4120, 3905, 3507, 2815, 1689, 398],
124
+ device='cuda:0')","tensor([ 0, 3, 23, 27, 32, 10, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,214.83485412597656,81.65939331054688,0.1148595809936523,0.5063759088516235,0.7218447327613831,30926,172403,736,1687,475913,334436,0,0,0,0,"tensor([438, 438, 483, 586, 665, 739, 767, 775, 790, 682, 249],
125
+ device='cuda:0')","tensor([26, 26, 42, 56, 61, 52, 47, 51, 53, 47, 16], device='cuda:0')"
126
+ 31,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_31/,autoencoders/matryoshka_othello/trainer_31/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,247.4399871826172,496.8312377929688,2.689258337020874,0.1053466796875,0.9855172634124756,0.993039071559906,0.9965763688087464,2.054067611694336,2.054001569747925,4.913147449493408,1.0000231266021729,2222,59000,3776000,3,234.8309326171875,86.22757720947266,0.4350329935550689,0.5969975590705872,0.6699404120445251,1062182,1631168,45043,57404,2713818,2144832,15285,18594,15150,18410,"tensor([33500, 33565, 34734, 37461, 39624, 40741, 40161, 37018, 29941, 19445,
127
+ 4779], device='cuda:0')","tensor([ 3, 12, 28, 33, 32, 15, 10, 7, 0, 0, 0], device='cuda:0')",59000,2006000,3,234.8309326171875,86.22757720947266,0.1616047173738479,0.4350122511386871,0.5045793652534485,176571,559565,2650,7075,1829429,1446435,1163,932,1162,931,"tensor([3393, 3396, 3522, 3711, 3838, 3795, 3653, 3248, 2545, 1656, 461],
128
+ device='cuda:0')","tensor([ 0, 1, 17, 22, 22, 5, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,234.8309326171875,86.22757720947266,0.1168856918811798,0.4074470400810241,0.5992284417152405,31508,129975,778,1183,475331,376864,0,0,0,0,"tensor([485, 486, 536, 630, 723, 761, 787, 790, 752, 683, 288],
129
+ device='cuda:0')","tensor([27, 27, 38, 51, 53, 46, 46, 50, 56, 44, 14], device='cuda:0')"
130
+ 32,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_32/,autoencoders/matryoshka_othello/trainer_32/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,268.8399963378906,569.4232177734375,2.5443568229675293,0.101318359375,0.9869800209999084,0.993770956993103,0.99682354927063,2.054067611694336,2.054102659225464,4.913147449493408,0.9999877214431764,2174,59000,3776000,3,255.8010406494141,87.0902099609375,0.4180728793144226,0.5635177493095398,0.6338106393814087,1009308,1502847,43075,54958,2766692,2273153,14226,17305,14111,17139,"tensor([36377, 36657, 39200, 41770, 43771, 43889, 42335, 37968, 30952, 19981,
131
+ 5762], device='cuda:0')","tensor([ 2, 13, 23, 28, 24, 15, 10, 6, 0, 0, 0], device='cuda:0')",59000,2006000,3,255.8010406494141,87.0902099609375,0.1459967046976089,0.3664756417274475,0.4384575486183166,158168,451633,2566,7104,1847832,1554367,1190,910,1190,910,"tensor([3356, 3368, 3531, 3710, 3837, 3827, 3595, 3103, 2542, 1637, 538],
132
+ device='cuda:0')","tensor([ 0, 1, 11, 16, 13, 4, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,255.8010406494141,87.0902099609375,0.119688369333744,0.3651107251644134,0.5337852239608765,32307,113439,706,1117,474532,393400,0,0,0,0,"tensor([542, 542, 579, 669, 756, 785, 803, 777, 759, 684, 340],
133
+ device='cuda:0')","tensor([32, 32, 42, 56, 60, 57, 49, 47, 52, 47, 20], device='cuda:0')"
134
+ 33,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_33/,autoencoders/matryoshka_othello/trainer_33/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,289.67999267578125,641.2380981445312,2.411230087280273,0.0968017578125,0.9882875084877014,0.9943447113037108,0.997796595096588,2.054067611694336,2.054039001464844,4.913147449493408,1.0000100135803225,2085,59000,3776000,3,275.8831481933594,87.74876403808594,0.4057557284832001,0.5095349550247192,0.6034495830535889,970777,1310171,38255,56444,2805223,2465829,13931,16743,13828,16627,"tensor([36771, 36845, 39093, 42178, 44260, 44391, 42811, 39353, 32116, 21726,
135
+ 5251], device='cuda:0')","tensor([ 2, 9, 12, 19, 17, 9, 8, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,275.8831481933594,87.74876403808594,0.1384316086769104,0.3176446855068207,0.4112668633460998,149354,380160,2448,7458,1856646,1625840,1118,703,1118,703,"tensor([3187, 3191, 3322, 3555, 3671, 3646, 3404, 3108, 2505, 1749, 487],
136
+ device='cuda:0')","tensor([ 0, 1, 4, 11, 9, 1, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,275.8831481933594,87.74876403808594,0.1195691525936126,0.2655493319034576,0.4428947567939758,32275,77732,741,872,474564,429107,0,0,0,0,"tensor([531, 531, 571, 675, 740, 798, 788, 810, 786, 748, 296],
137
+ device='cuda:0')","tensor([24, 24, 31, 45, 48, 48, 45, 49, 49, 44, 12], device='cuda:0')"
138
+ 34,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_34/,autoencoders/matryoshka_othello/trainer_34/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,309.8800048828125,718.7659912109375,2.330029010772705,0.090576171875,0.989105463027954,0.9947177171707152,0.9972371459007264,2.054067611694336,2.054044246673584,4.913147449493408,1.000008225440979,2058,59000,3776000,3,296.8710327148437,90.3069076538086,0.3926414549350738,0.4576225578784942,0.5406661629676819,931416,1135440,36943,50903,2844584,2640560,14608,14773,14510,14690,"tensor([38533, 38596, 41402, 44204, 45972, 45686, 44161, 39382, 31650, 20789,
139
+ 6262], device='cuda:0')","tensor([ 1, 7, 10, 12, 10, 9, 8, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,296.8710327148437,90.3069076538086,0.1294526606798172,0.2134841382503509,0.2938487529754638,138982,240254,2243,4536,1867018,1765746,979,763,979,763,"tensor([3212, 3216, 3399, 3614, 3702, 3635, 3463, 3008, 2443, 1652, 574],
140
+ device='cuda:0')","tensor([0, 0, 2, 4, 2, 1, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,296.8710327148437,51.71282577514648,0.1152388527989387,0.2257803678512573,0.3751693665981293,31031,64604,681,830,475808,442235,0,0,0,0,"tensor([555, 555, 610, 717, 803, 820, 831, 810, 769, 728, 352],
141
+ device='cuda:0')","tensor([23, 23, 30, 40, 49, 45, 44, 43, 47, 46, 16], device='cuda:0')"
142
+ 35,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_35/,autoencoders/matryoshka_othello/trainer_35/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,332.2799987792969,787.7266235351562,2.213133811950684,0.0870361328125,0.990141212940216,0.995234191417694,0.9970736503601074,2.054067611694336,2.0539331436157227,4.913147449493408,1.0000470876693726,1999,59000,3776000,3,317.0438232421875,94.90868377685548,0.3822266459465027,0.4202511608600616,0.5066519975662231,900272,1017162,34399,47572,2875728,2758838,14283,14629,14182,14550,"tensor([38037, 38083, 40435, 43225, 45071, 45030, 43407, 39202, 31530, 21949,
143
+ 5242], device='cuda:0')","tensor([ 0, 4, 8, 10, 9, 8, 7, 3, 0, 0, 0], device='cuda:0')",59000,2006000,4,317.0438232421875,53.44825744628906,0.1275084465742111,0.1668349057435989,0.2326089739799499,136753,182795,2250,2533,1869247,1823205,1059,459,1059,459,"tensor([3105, 3105, 3277, 3447, 3581, 3516, 3308, 2973, 2378, 1705, 488],
144
+ device='cuda:0')","tensor([0, 0, 1, 3, 2, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,5,317.0438232421875,26.52299118041992,0.1143601387739181,0.1528678238391876,0.2642676532268524,30777,41996,631,607,476062,464843,0,0,0,0,"tensor([564, 564, 619, 687, 766, 823, 823, 811, 757, 728, 294],
145
+ device='cuda:0')","tensor([27, 27, 28, 35, 41, 49, 49, 47, 47, 46, 13], device='cuda:0')"
146
+ 36,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_36/,autoencoders/matryoshka_othello/trainer_36/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,352.44000244140625,850.2285766601562,2.103053569793701,0.0811767578125,0.990962564945221,0.9956517815589904,0.9964840412139891,2.054067611694336,2.0540781021118164,4.913147449493408,0.999996304512024,1915,59000,3776000,2,338.1718139648437,163.6559600830078,0.3687447607517242,0.383554995059967,0.4691860675811767,860713,906831,31627,45727,2915287,2869169,12905,13512,12814,13419,"tensor([37508, 37558, 39758, 43390, 43862, 43685, 41784, 37089, 29945, 21116,
147
+ 6062], device='cuda:0')","tensor([0, 1, 3, 3, 4, 3, 2, 1, 0, 0, 0], device='cuda:0')",59000,2006000,2,338.1718139648437,163.6559600830078,0.117171935737133,0.1328587532043457,0.1870912760496139,124973,142962,2183,3128,1881027,1863038,1029,945,1029,945,"tensor([2990, 2996, 3173, 3398, 3404, 3328, 3130, 2728, 2211, 1636, 554],
148
+ device='cuda:0')","tensor([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,338.1718139648437,98.81194305419922,0.1066550239920616,0.1364925801753997,0.2239283621311187,28594,37167,763,595,478245,469672,0,0,0,0,"tensor([541, 545, 609, 701, 719, 782, 786, 734, 708, 691, 342],
149
+ device='cuda:0')","tensor([27, 27, 33, 36, 37, 41, 42, 39, 38, 39, 14], device='cuda:0')"
150
+ 37,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_37/,autoencoders/matryoshka_othello/trainer_37/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,371.1600036621094,906.1168823242188,1.9735631942749023,0.07568359375,0.9919406175613404,0.996134877204895,0.996514856815338,2.054067611694336,2.053974866867065,4.913147449493408,1.0000324249267578,1845,59000,3776000,0,358.4033203125,358.4033203125,0.364502489566803,0.364502489566803,0.4422221779823303,849796,849796,36976,36976,2926204,2926204,11933,11933,11823,11823,"tensor([37811, 37898, 39441, 42398, 44228, 43333, 41059, 36433, 28519, 20098,
151
+ 6077], device='cuda:0')","tensor([0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,358.4033203125,281.87481689453125,0.113246351480484,0.1134063974022865,0.1583442091941833,120553,120732,2487,2460,1885447,1885268,1068,1059,1068,1059,"tensor([2932, 2944, 3086, 3284, 3380, 3229, 3039, 2628, 2039, 1528, 545],
152
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,358.4033203125,105.75222778320312,0.1028723716735839,0.1351223289966583,0.1971773654222488,27518,36788,636,887,479321,470051,0,0,0,0,"tensor([519, 520, 565, 684, 769, 777, 783, 754, 701, 670, 336],
153
+ device='cuda:0')","tensor([19, 19, 24, 30, 39, 42, 41, 40, 35, 35, 8], device='cuda:0')"
154
+ 38,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_38/,autoencoders/matryoshka_othello/trainer_38/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,393.03997802734375,962.9037475585938,1.7561087608337402,0.0706787109375,0.9935108423233032,0.9968559741973876,0.9963199496269226,2.054067611694336,2.054198980331421,4.913147449493408,0.9999540448188782,1723,59000,3776000,1,379.14739990234375,304.4222412109375,0.3463058769702911,0.3465242981910705,0.4133483469486236,796370,797019,26859,27057,2979630,2978981,10397,10696,10299,10599,"tensor([37390, 37453, 38283, 40984, 42499, 41523, 39066, 34095, 28026, 20106,
155
+ 5529], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,379.14739990234375,379.14739990234375,0.1015911921858787,0.1015911921858787,0.1330360323190689,107478,107478,2414,2414,1898522,1898522,1103,1103,1103,1103,"tensor([2726, 2735, 2883, 3071, 3136, 3016, 2831, 2452, 2061, 1550, 504],
156
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,2,379.14739990234375,190.4300842285156,0.0943196415901184,0.0998932942748069,0.1562338024377823,25121,26681,718,670,481718,480158,0,0,0,0,"tensor([491, 491, 574, 658, 760, 780, 788, 762, 710, 686, 309],
157
+ device='cuda:0')","tensor([26, 26, 34, 36, 38, 41, 44, 42, 43, 42, 11], device='cuda:0')"
158
+ 39,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_39/,autoencoders/matryoshka_othello/trainer_39/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,412.55999755859375,1013.3768310546876,1.5014727115631104,0.068603515625,0.9951813817024232,0.9976673722267152,0.9966961741447448,2.054067611694336,2.05408263206482,4.913147449493408,0.9999947547912598,1629,59000,3776000,0,400.2008056640625,400.2008056640625,0.3234314620494842,0.3234314620494842,0.3781732618808746,732796,732796,22587,22587,3043204,3043204,9184,9184,9098,9098,"tensor([36150, 36250, 36803, 38712, 40490, 39016, 37079, 32622, 26534, 19237,
159
+ 5990], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,400.2008056640625,400.2008056640625,0.0868618562817573,0.0868618562817573,0.1116931959986686,91163,91163,1871,1871,1914837,1914837,904,904,902,902,"tensor([2540, 2551, 2653, 2807, 2846, 2719, 2602, 2300, 1912, 1470, 547],
160
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,2,400.2008056640625,204.20208740234372,0.0875452533364296,0.0915737897157669,0.1330818384885788,23230,24347,628,560,483609,482492,0,0,0,0,"tensor([521, 526, 597, 684, 745, 761, 790, 753, 730, 703, 339],
161
+ device='cuda:0')","tensor([26, 27, 32, 40, 45, 47, 47, 47, 46, 45, 11], device='cuda:0')"
matryoshka_othello/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fd603b68ae7fd1b716dc946c6e03b3fa435f0894d31eefa57522f49df23877
3
+ size 16797965
matryoshka_othello/trainer_0/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 10,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-0_trainer_0",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_0/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_0/,autoencoders/matryoshka_othello/trainer_0/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,9.920000076293945,54.032108306884766,12.195414543151855,0.0478515625,0.6925392150878906,0.8403303623199463,0.8648781180381775,2.054067611694336,2.5528128147125244,4.913147449493408,0.8255574703216553,2502,59000,3776000,0,10.004157066345215,10.004157066345215,0.6467792987823486,0.6467792987823486,0.7024305462837219,1826214,1826214,44887,44887,1949786,1949786,15201,15201,15119,15119,"tensor([18648, 18648, 18648, 18665, 18778, 19179, 19587, 20977, 21130, 15219,
3
+ 2861], device='cuda:0')","tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,10.004157066345215,10.004157066345215,0.5270017385482788,0.5270017385482788,0.5584830641746521,722269,722269,12781,12781,1283731,1283731,3684,3684,3674,3674,"tensor([5886, 5886, 5886, 5889, 5909, 6019, 6171, 6596, 6358, 3406, 278],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,0,10.004157066345215,10.004157066345215,0.5241131782531738,0.5241131782531738,0.607089102268219,180916,180916,2615,2615,325923,325923,0,0,0,0,"tensor([611, 611, 611, 611, 615, 657, 712, 839, 921, 790, 179],
5
+ device='cuda:0')","tensor([31, 31, 31, 31, 32, 33, 31, 37, 39, 38, 10], device='cuda:0')"
matryoshka_othello/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797198cf752918461345d0fabf5449c9dcbab5d24729a022ba35010023eb4349
3
+ size 16797965
matryoshka_othello/trainer_1/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 30,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-1_trainer_1",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_1/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_1/,autoencoders/matryoshka_othello/trainer_1/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,30.31999969482422,113.8661117553711,8.712231636047363,0.092529296875,0.841371476650238,0.9205113649368286,0.9318574070930481,2.054067611694336,2.059459686279297,4.913147449493408,0.9981140494346619,1501,59000,3776000,1,29.974895477294922,29.966999053955078,0.7845427393913269,0.7854730486869812,0.8259817957878113,2470892,2476241,52043,52854,1305108,1299759,23814,24623,23594,24399,"tensor([11563, 11567, 11576, 11661, 11942, 12713, 13935, 15006, 15493, 13308,
3
+ 3167], device='cuda:0')","tensor([12, 12, 12, 12, 11, 9, 7, 3, 0, 0, 0], device='cuda:0')",59000,2006000,2,29.974895477294922,29.808122634887695,0.7183125615119934,0.7183449268341064,0.7515202164649963,1138210,1138225,24912,24796,867790,867775,8102,8062,8062,8022,"tensor([2389, 2389, 2391, 2433, 2571, 2781, 3069, 3256, 3104, 1964, 286],
4
+ device='cuda:0')","tensor([2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,29.974895477294922,25.32160758972168,0.5449132323265076,0.5775192379951477,0.8116542100906372,190988,206626,3158,2099,315851,300213,0,0,0,0,"tensor([224, 224, 224, 236, 262, 324, 388, 461, 504, 509, 172],
5
+ device='cuda:0')","tensor([44, 44, 44, 42, 42, 44, 28, 31, 30, 24, 6], device='cuda:0')"
matryoshka_othello/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65da00de6e35237cedf53c50bd17dfb9762c74bf76908540d062b8014fd2bd2e
3
+ size 16797965
matryoshka_othello/trainer_10/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 215,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-10_trainer_10",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_10/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_10/,autoencoders/matryoshka_othello/trainer_10/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,228.59999084472656,534.2022705078125,3.2362053394317627,0.203369140625,0.9792519211769104,0.9899451732635498,0.9941858649253845,2.054067611694336,2.0541346073150635,4.913147449493408,0.9999765753746033,1707,59000,3776000,3,214.98684692382812,74.27256774902344,0.4247049391269684,0.5244215130805969,0.6110751628875732,1029604,1358190,42957,45575,2746396,2417810,9552,12230,9491,12201,"tensor([19891, 19995, 21180, 23371, 25260, 26835, 28276, 28982, 24922, 16921,
3
+ 3927], device='cuda:0')","tensor([ 7, 8, 16, 23, 22, 10, 10, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,214.98684692382812,74.27256774902344,0.1520613431930542,0.36109068989753723,0.43211233615875244,165258,443371,2312,6361,1840742,1562629,732,612,732,612,"tensor([2194, 2198, 2324, 2501, 2634, 2709, 2747, 2757, 2272, 1499, 369],
4
+ device='cuda:0')","tensor([ 0, 0, 8, 14, 13, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,214.98684692382812,43.13955307006836,0.0960441380739212,0.1836436539888382,0.3245460093021393,25590,51334,451,888,481249,455505,0,0,0,0,"tensor([381, 381, 397, 454, 530, 592, 628, 681, 681, 634, 240],
5
+ device='cuda:0')","tensor([17, 17, 17, 20, 31, 32, 33, 40, 46, 44, 11], device='cuda:0')"
matryoshka_othello/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0730711098947fce0dd0d60160a49ef3f9ac6cc9aa54959998e6a5b5ad02d470
3
+ size 16797965
matryoshka_othello/trainer_11/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 235,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-11_trainer_11",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_11/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_11/,autoencoders/matryoshka_othello/trainer_11/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,247.1199951171875,595.2257080078125,3.062748432159424,0.199951171875,0.9814200401306152,0.9909713268280029,0.9953888654708862,2.054067611694336,2.0539865493774414,4.913147449493408,1.000028371810913,1615,59000,3776000,3,235.28720092773438,77.79491424560547,0.4016973674297333,0.49049556255340576,0.5835108757019043,958851,1240953,39146,43044,2817149,2535047,8322,10563,8267,10533,"tensor([21175, 21216, 22265, 24300, 26572, 28573, 29297, 27666, 24001, 17062,
3
+ 4198], device='cuda:0')","tensor([ 5, 7, 15, 21, 19, 8, 7, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,235.28720092773438,77.79491424560547,0.13514931499958038,0.33908507227897644,0.4126876890659332,145540,410807,2226,6225,1860460,1595193,702,546,702,546,"tensor([2039, 2040, 2170, 2354, 2497, 2652, 2688, 2496, 2131, 1519, 398],
4
+ device='cuda:0')","tensor([ 0, 0, 8, 14, 11, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,8,235.28720092773438,2.394713878631592,0.09019505232572556,0.13707506656646729,0.2549251914024353,23960,37335,494,564,482879,469504,0,0,0,0,"tensor([397, 397, 422, 478, 565, 631, 674, 684, 711, 654, 245],
5
+ device='cuda:0')","tensor([22, 22, 23, 28, 35, 38, 45, 46, 51, 44, 12], device='cuda:0')"
matryoshka_othello/trainer_12/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7afe883d17647a51d9c7347c68120908c42f35634f983714307af47831a39819
3
+ size 16797965
matryoshka_othello/trainer_12/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 256,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-12_trainer_12",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_12/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_12/,autoencoders/matryoshka_othello/trainer_12/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,271.55999755859375,654.33203125,2.8832952976226807,0.192138671875,0.9833529591560364,0.9918925166130066,0.9966205954551697,2.054067611694336,2.054128408432007,4.913147449493408,0.9999787211418152,1575,59000,3776000,3,256.135986328125,82.25318908691406,0.3809117376804352,0.468325138092041,0.5484753847122192,896579,1167424,34963,42104,2879421,2608576,8640,10343,8573,10315,"tensor([22183, 22188, 23469, 25735, 27816, 29114, 29468, 27806, 23865, 16459,
3
+ 3946], device='cuda:0')","tensor([ 2, 5, 9, 16, 13, 7, 7, 5, 0, 0, 0], device='cuda:0')",59000,2006000,3,256.135986328125,82.25318908691406,0.12387917190790176,0.29024532437324524,0.35326382517814636,132587,341516,1999,5776,1873413,1664484,791,551,790,551,"tensor([1956, 1956, 2119, 2333, 2468, 2534, 2539, 2347, 2021, 1441, 381],
4
+ device='cuda:0')","tensor([0, 0, 2, 9, 6, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,8,256.135986328125,2.2150840759277344,0.08993130177259445,0.12649564445018768,0.23402975499629974,23888,34248,523,402,482951,472591,0,0,0,0,"tensor([386, 386, 420, 512, 584, 628, 671, 698, 707, 638, 241],
5
+ device='cuda:0')","tensor([22, 22, 23, 29, 36, 37, 44, 44, 49, 47, 8], device='cuda:0')"
matryoshka_othello/trainer_13/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1039398476b32f1c5836209f1ffc6bf66d3913389778b97f3959a660a18b32c
3
+ size 16797965
matryoshka_othello/trainer_13/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 276,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-13_trainer_13",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_13/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_13/,autoencoders/matryoshka_othello/trainer_13/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,289.8399963378906,707.417236328125,2.697723865509033,0.1826171875,0.9854848980903625,0.9929239153862,0.9954584836959839,2.054067611694336,2.054515838623047,4.913147449493408,0.9998432397842407,1497,59000,3776000,3,276.1554870605469,88.50450897216797,0.37100639939308167,0.4198465943336487,0.508614718914032,867364,1012249,32372,33746,2908636,2763751,7857,8219,7802,8186,"tensor([22680, 22685, 23319, 25833, 28213, 29363, 29542, 28197, 23753, 16424,
3
+ 4091], device='cuda:0')","tensor([1, 5, 5, 8, 6, 5, 5, 3, 0, 0, 0], device='cuda:0')",59000,2006000,3,276.1554870605469,88.50450897216797,0.117937833070755,0.2163107842206955,0.2852068543434143,125811,243816,1703,4496,1880189,1762184,681,396,681,396,"tensor([1958, 1958, 2052, 2268, 2419, 2487, 2450, 2332, 1996, 1390, 393],
4
+ device='cuda:0')","tensor([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,8,276.1554870605469,2.1634888648986816,0.08815542608499527,0.11622727662324905,0.20241819322109222,23392,31291,468,315,483447,475548,0,0,0,0,"tensor([397, 397, 433, 532, 599, 630, 685, 713, 738, 625, 243],
5
+ device='cuda:0')","tensor([22, 22, 26, 30, 32, 35, 41, 44, 51, 39, 12], device='cuda:0')"
matryoshka_othello/trainer_14/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69520c156095fa8de182600d05d5eff1045e2ab45eca42b742a615bd1c0c37d9
3
+ size 16797965
matryoshka_othello/trainer_14/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 297,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-14_trainer_14",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_14/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_14/,autoencoders/matryoshka_othello/trainer_14/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,310.8399963378906,764.2993774414062,2.615259885787964,0.169189453125,0.9864574074745178,0.9933949112892151,0.9952017068862915,2.054067611694336,2.0541067123413086,4.913147449493408,0.9999863505363464,1443,59000,3776000,3,297.3014221191406,95.52342224121094,0.3599364757537842,0.38196372985839844,0.483002245426178,835100,900207,29163,37366,2940900,2875793,8070,7924,7996,7895,"tensor([22859, 22872, 23669, 26252, 28502, 30156, 30299, 28702, 24374, 16114,
3
+ 4956], device='cuda:0')","tensor([0, 0, 0, 1, 2, 3, 1, 0, 0, 0, 0], device='cuda:0')",59000,2006000,3,297.3014221191406,95.52342224121094,0.11666011065244675,0.15745967626571655,0.2267763465642929,124371,171669,1823,2813,1881629,1834331,648,440,647,440,"tensor([1895, 1895, 2001, 2234, 2377, 2476, 2468, 2321, 1992, 1351, 469],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,297.3014221191406,95.52342224121094,0.09342960268259048,0.09662330895662308,0.17053866386413574,24859,25749,446,389,481980,481090,0,0,0,0,"tensor([399, 401, 447, 535, 599, 660, 688, 694, 707, 618, 298],
5
+ device='cuda:0')","tensor([21, 21, 21, 31, 31, 42, 40, 41, 43, 43, 16], device='cuda:0')"
matryoshka_othello/trainer_15/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d79deeadf32591eb37df9c13f686eebe3731479280847341e055fcc88e911ae
3
+ size 16797965
matryoshka_othello/trainer_15/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 317,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-15_trainer_15",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_15/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_15/,autoencoders/matryoshka_othello/trainer_15/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,333.91998291015625,819.3239135742188,2.465003490447998,0.1611328125,0.9879618287086487,0.9940750002861023,0.995798647403717,2.054067611694336,2.0541460514068604,4.913147449493408,0.9999725818634033,1403,59000,3776000,0,317.4093933105469,317.4093933105469,0.35228338837623596,0.35228338837623596,0.4444047808647156,813362,813362,28295,28295,2962638,2962638,7702,7702,7654,7654,"tensor([23185, 23195, 23981, 26259, 28147, 29593, 30329, 28607, 23925, 16137,
3
+ 3954], device='cuda:0')","tensor([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,2,317.4093933105469,174.72747802734375,0.1087888553738594,0.10901494324207306,0.16127271950244904,115494,115750,1775,1812,1890506,1890250,701,573,701,573,"tensor([1878, 1883, 1993, 2184, 2318, 2406, 2442, 2297, 1935, 1329, 370],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,317.4093933105469,104.05632781982422,0.08447640389204025,0.0918102115392685,0.14653293788433075,22374,24406,497,417,484465,482433,0,0,0,0,"tensor([376, 378, 434, 555, 608, 658, 701, 723, 719, 625, 231],
5
+ device='cuda:0')","tensor([20, 21, 23, 32, 41, 42, 43, 43, 48, 42, 11], device='cuda:0')"
matryoshka_othello/trainer_16/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968a3a947b00de1c15d774b21fd567d4e4823d8e1a6941dde8177c6ca41a2480
3
+ size 16797965
matryoshka_othello/trainer_16/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 338,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-16_trainer_16",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_16/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_16/,autoencoders/matryoshka_othello/trainer_16/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,354.55999755859375,872.7642822265625,2.2573609352111816,0.153564453125,0.9898377060890198,0.9950308799743652,0.9959880113601685,2.054067611694336,2.05419659614563,4.913147449493408,0.999954879283905,1357,59000,3776000,0,338.1923828125,338.1923828125,0.34010589122772217,0.34010589122772217,0.4206508994102478,779105,779105,26439,26439,2996895,2996895,6872,6872,6815,6815,"tensor([23346, 23359, 23800, 26144, 27963, 29101, 28679, 26974, 22132, 15491,
3
+ 4451], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,338.1923828125,290.7479553222656,0.09700101613998413,0.0970679447054863,0.12913531064987183,102337,102411,1682,1678,1903663,1903589,570,569,569,568,"tensor([1883, 1885, 1979, 2201, 2304, 2344, 2263, 2104, 1750, 1273, 426],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,338.1923828125,112.71033477783203,0.07547971606254578,0.08508501201868057,0.12517893314361572,19898,22536,504,354,486941,484303,0,0,0,0,"tensor([393, 395, 440, 560, 631, 674, 670, 676, 677, 607, 277],
5
+ device='cuda:0')","tensor([24, 24, 25, 35, 36, 39, 43, 47, 44, 38, 13], device='cuda:0')"
matryoshka_othello/trainer_17/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d26411e41f83acc9bab8d76205b9a88e48d1d5a15760f13daa826161e42a04
3
+ size 16797965
matryoshka_othello/trainer_17/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 358,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-17_trainer_17",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_17/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_17/,autoencoders/matryoshka_othello/trainer_17/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,372.8800048828125,926.5501098632812,2.116051197052002,0.1455078125,0.991376519203186,0.9956154227256775,0.9965057969093323,2.054067611694336,2.0541765689849854,4.913147449493408,0.9999619126319885,1320,59000,3776000,0,358.14251708984375,358.14251708984375,0.33243685960769653,0.33243685960769653,0.40639546513557434,757789,757789,25206,25206,3018211,3018211,6385,6385,6334,6334,"tensor([24675, 24682, 25349, 27123, 28309, 28956, 27821, 25905, 22054, 15903,
3
+ 4849], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,358.14251708984375,309.5303039550781,0.08881276100873947,0.08891495317220688,0.11463570594787598,93286,93398,1448,1441,1912714,1912602,480,479,480,479,"tensor([1862, 1865, 1998, 2185, 2256, 2289, 2190, 1989, 1725, 1286, 464],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,358.14251708984375,120.12174224853516,0.07169979065656662,0.08049111813306808,0.11640919744968414,18864,21267,491,325,487975,485572,0,0,0,0,"tensor([404, 404, 491, 581, 655, 704, 700, 696, 716, 634, 287],
5
+ device='cuda:0')","tensor([20, 20, 28, 36, 40, 43, 40, 45, 44, 40, 12], device='cuda:0')"
matryoshka_othello/trainer_18/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92861fab268ac2c942375ecf470d5fd0b1c1b758ea13d59b3e2cc98904516e1c
3
+ size 16797965
matryoshka_othello/trainer_18/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 379,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-18_trainer_18",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_18/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_18/,autoencoders/matryoshka_othello/trainer_18/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,393.3599853515625,977.3129272460938,1.862903356552124,0.138427734375,0.9928845167160034,0.9965375065803528,0.9965564012527466,2.054067611694336,2.054072141647339,4.913147449493408,0.9999983906745911,1245,59000,3776000,0,379.510009765625,379.510009765625,0.3191245496273041,0.3191245496273041,0.38819584250450134,721154,721154,22423,22423,3054846,3054846,5616,5616,5563,5563,"tensor([23867, 23885, 24359, 25717, 27450, 27677, 27217, 24588, 19811, 14830,
3
+ 5344], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,1,379.510009765625,330.9790344238281,0.07927726209163666,0.07931189984083176,0.09983473271131516,82855,82892,1404,1387,1923145,1923108,437,434,437,434,"tensor([1767, 1771, 1874, 2008, 2153, 2137, 2108, 1885, 1537, 1192, 498],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,379.510009765625,69.05247497558594,0.07098888605833054,0.07606963813304901,0.10719474405050278,18671,20048,516,209,488168,486791,0,0,0,0,"tensor([428, 430, 506, 581, 667, 689, 705, 694, 630, 585, 314],
5
+ device='cuda:0')","tensor([27, 27, 29, 40, 43, 46, 48, 49, 40, 38, 17], device='cuda:0')"
matryoshka_othello/trainer_19/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c330ceaf38267f37ddad668fafc7e3823e09dc8c70735215f49f7066fcc6156
3
+ size 16797965
matryoshka_othello/trainer_19/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 400,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-19_trainer_19",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_19/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_19/,autoencoders/matryoshka_othello/trainer_19/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,414.91998291015625,1019.5823364257812,1.6936144828796387,0.13623046875,0.9943257570266724,0.9971392750740051,0.9965102076530457,2.054067611694336,2.054236650466919,4.913147449493408,0.9999408721923828,1189,59000,3776000,0,400.29974365234375,400.29974365234375,0.2922493517398834,0.2922493517398834,0.35929182171821594,649253,649253,17891,17891,3126747,3126747,3816,3816,3793,3793,"tensor([23418, 23460, 23478, 24578, 25433, 25167, 23335, 20836, 17225, 13387,
3
+ 4693], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,400.29974365234375,400.29974365234375,0.06231624260544777,0.06231624260544777,0.0808260515332222,64552,64552,1203,1203,1941448,1941448,333,333,333,333,"tensor([1598, 1606, 1680, 1810, 1907, 1899, 1773, 1593, 1326, 1064, 440],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,4,400.29974365234375,75.95011138916016,0.055579621344804764,0.06591691821813583,0.09253963828086853,14500,17282,435,236,492339,489557,0,0,0,0,"tensor([387, 392, 451, 551, 630, 642, 649, 645, 612, 538, 288],
5
+ device='cuda:0')","tensor([25, 26, 27, 35, 41, 43, 42, 41, 40, 36, 15], device='cuda:0')"
matryoshka_othello/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c320e76bbb74c7898084933d28bec62dc0c2bfe9655e36add446b1599d8457
3
+ size 16797965
matryoshka_othello/trainer_2/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.0004,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 4096,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 128,
32
+ 256,
33
+ 512,
34
+ 1024,
35
+ 2176
36
+ ],
37
+ "k": 51,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-2_trainer_2",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_2/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_2/,autoencoders/matryoshka_othello/trainer_2/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,52.63999938964844,160.45501708984375,6.988826751708984,0.11962890625,0.8998516798019409,0.9507719278335571,0.9625293016433716,2.054067611694336,2.0564510822296143,4.913147449493408,0.9991663694381714,1541,59000,3776000,2,50.94383239746094,47.7843132019043,0.7862502336502075,0.793076753616333,0.8443553447723389,2494429,2524621,74699,66029,1281571,1251379,37813,33972,37368,33599,"tensor([ 7562, 7619, 7944, 8424, 9375, 10583, 12019, 13344, 14409, 11636,
3
+ 2879], device='cuda:0')","tensor([43, 43, 43, 44, 33, 22, 13, 8, 0, 0, 0], device='cuda:0')",59000,2006000,3,50.94383239746094,42.56414031982422,0.7287904024124146,0.7426672577857971,0.7913922667503357,1171567,1195516,37533,18004,834433,810484,13787,4853,13776,4844,"tensor([1832, 1832, 1860, 1976, 2229, 2494, 2753, 2872, 2661, 1522, 262],
4
+ device='cuda:0')","tensor([32, 32, 32, 33, 22, 11, 4, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,50.94383239746094,42.56414031982422,0.3663000166416168,0.5692099332809448,0.7786582708358765,114553,202610,4068,2450,392286,304229,0,0,0,0,"tensor([142, 142, 159, 185, 230, 280, 339, 371, 423, 424, 170],
5
+ device='cuda:0')","tensor([24, 24, 37, 41, 36, 29, 21, 19, 22, 23, 6], device='cuda:0')"
matryoshka_othello/trainer_20/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ef2822285b88f743a1e71601667a729380d277419b3b1e6a6be6b1c6c1d9ab
3
+ size 33591565
matryoshka_othello/trainer_20/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 8192,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 256,
32
+ 512,
33
+ 1024,
34
+ 2048,
35
+ 4352
36
+ ],
37
+ "k": 10,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-20_trainer_20",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_20/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_20/,autoencoders/matryoshka_othello/trainer_20/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,9.4399995803833,51.98342514038086,12.217387199401855,0.0245361328125,0.6901394128799438,0.8368940353393555,0.849263608455658,2.054067611694336,2.5678534507751465,4.913147449493408,0.8202967643737793,2534,59000,3776000,0,9.985015869140625,9.985015869140625,0.6314403414726257,0.6314403414726257,0.6913968324661255,1761268,1761268,41305,41305,2014732,2014732,13177,13177,13095,13095,"tensor([19011, 19011, 19011, 19017, 19067, 19434, 20006, 20472, 20391, 15369,
3
+ 2182], device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,2006000,0,9.985015869140625,9.985015869140625,0.5106327533721924,0.5106327533721924,0.5424720048904419,691565,691565,11094,11094,1314435,1314435,3143,3143,3140,3140,"tensor([6277, 6277, 6277, 6277, 6292, 6382, 6465, 6480, 5819, 3145, 213],
4
+ device='cuda:0')","tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')",59000,506839,0,9.985015869140625,9.985015869140625,0.5258867740631104,0.5258867740631104,0.605864942073822,181549,181549,2061,2061,325290,325290,0,0,0,0,"tensor([590, 590, 590, 590, 595, 619, 688, 784, 860, 742, 118],
5
+ device='cuda:0')","tensor([32, 32, 32, 32, 33, 35, 36, 34, 34, 25, 6], device='cuda:0')"
matryoshka_othello/trainer_21/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f6290a22af659f335b6b4c20f76904fe41906ce8adcd57ceabf846e524713b
3
+ size 33591565
matryoshka_othello/trainer_21/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 8192,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 256,
32
+ 512,
33
+ 1024,
34
+ 2048,
35
+ 4352
36
+ ],
37
+ "k": 30,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-21_trainer_21",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_21/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_21/,autoencoders/matryoshka_othello/trainer_21/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,30.35999870300293,112.67391204833984,8.406524658203125,0.048095703125,0.8532488942146301,0.9266902208328247,0.9364179968833923,2.054067611694336,2.0598807334899902,4.913147449493408,0.9979667663574219,2351,59000,3776000,0,29.9969482421875,29.9969482421875,0.7910191416740417,0.7910191416740417,0.8393763899803162,2507967,2507967,57136,57136,1268033,1268033,27002,27002,26744,26744,"tensor([15944, 15944, 15961, 16041, 16361, 17312, 18383, 19606, 19829, 15951,
3
+ 3288], device='cuda:0')","tensor([12, 12, 12, 12, 12, 11, 9, 4, 0, 0, 0], device='cuda:0')",59000,2006000,3,29.9969482421875,28.96919059753418,0.7310500144958496,0.7317075133323669,0.7711263298988342,1170955,1170450,26533,22779,835045,835550,8995,6968,8966,6950,"tensor([4046, 4046, 4051, 4102, 4279, 4608, 4913, 4982, 4449, 2518, 300],
4
+ device='cuda:0')","tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,0,29.9969482421875,29.9969482421875,0.6568450331687927,0.6568450331687927,0.8117663860321045,250144,250144,4670,4670,256695,256695,0,0,0,0,"tensor([304, 304, 305, 313, 336, 408, 471, 560, 624, 571, 189],
5
+ device='cuda:0')","tensor([60, 60, 60, 57, 51, 43, 30, 30, 37, 30, 7], device='cuda:0')"
matryoshka_othello/trainer_22/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee63f1f235e358412cbe005551a00e80ddd7ae7c21899d2a203e0b29df81e9c3
3
+ size 33591565
matryoshka_othello/trainer_22/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "MatryoshkaBatchTopKTrainer",
4
+ "dict_class": "MatryoshkaBatchTopKSAE",
5
+ "lr": 0.000282842712474619,
6
+ "steps": 36621,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 256,
13
+ "seed": 42,
14
+ "activation_dim": 512,
15
+ "dict_size": 8192,
16
+ "group_fractions": [
17
+ 0.03125,
18
+ 0.0625,
19
+ 0.125,
20
+ 0.25,
21
+ 0.53125
22
+ ],
23
+ "group_weights": [
24
+ 0.2,
25
+ 0.2,
26
+ 0.2,
27
+ 0.2,
28
+ 0.2
29
+ ],
30
+ "group_sizes": [
31
+ 256,
32
+ 512,
33
+ 1024,
34
+ 2048,
35
+ 4352
36
+ ],
37
+ "k": 51,
38
+ "device": "cuda:0",
39
+ "layer": 5,
40
+ "lm_name": "Baidicoot/Othello-GPT-Transformer-Lens",
41
+ "wandb_name": "MatryoshkaBatchTopKTrainer-othello-22_trainer_22",
42
+ "submodule_name": null
43
+ },
44
+ "buffer": {
45
+ "d_submodule": 512,
46
+ "io": "out",
47
+ "n_ctxs": 1000.0,
48
+ "ctx_len": 59,
49
+ "refresh_batch_size": 64,
50
+ "out_batch_size": 8192,
51
+ "device": "cuda:0"
52
+ }
53
+ }
matryoshka_othello/trainer_22/results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,autoencoder_group_path,autoencoder_path,reconstruction_file,trainer_class,sae_class,eval_sae_n_inputs,eval_results_n_inputs,board_reconstruction_n_inputs,l0,l1_loss,l2_loss,frac_alive,frac_variance_explained,cossim,l2_ratio,loss_original,loss_reconstructed,loss_zero,frac_recovered,num_alive_features,games_batch_to_state_stack_mine_yours_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_board_reconstruction_board_count,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_num_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_idx,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_L0,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_last_f1_score_per_class,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_false_negative_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_multiple_classes,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_counts_per_T,games_batch_to_state_stack_mine_yours_blank_mask_BLRRC_high_precision_and_recall_counts_per_T,games_batch_to_valid_moves_BLRRC_board_reconstruction_board_count,games_batch_to_valid_moves_BLRRC_num_squares,games_batch_to_valid_moves_BLRRC_best_idx,games_batch_to_valid_moves_BLRRC_zero_L0,games_batch_to_valid_moves_BLRRC_best_L0,games_batch_to_valid_moves_BLRRC_zero_f1_score_per_class,games_batch_to_valid_moves_BLRRC_best_f1_score_per_class,games_batch_to_valid_moves_BLRRC_last_f1_score_per_class,games_batch_to_valid_moves_BLRRC_zero_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_false_positive_squares,games_batch_to_valid_moves_BLRRC_zero_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_best_num_false_negative_squares,games_batch_to_valid_moves_BLRRC_zero_multiple_classes,games_batch_to_valid_moves_BLRRC_best_multiple_classes,games_batch_to_valid_moves_BLRRC_zero_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_best_num_true_and_false_positive_squares,games_batch_to_valid_moves_BLRRC_high_precision_counts_per_T,games_batch_to_valid_moves_BLRRC_high_precision_and_recall_counts_per_T
2
+ 0,autoencoders/matryoshka_othello/,autoencoders/matryoshka_othello/trainer_22/,autoencoders/matryoshka_othello/trainer_22/indexing_None_n_inputs_1000_reconstruction.pkl,MatryoshkaBatchTopKTrainer,MatryoshkaBatchTopKSAE,1000,1000,1000,52.39999771118164,159.36683654785156,6.458910942077637,0.066650390625,0.9137061238288879,0.9574987292289734,0.9641538858413696,2.054067611694336,2.0557403564453125,4.913147449493408,0.9994149208068848,2054,59000,3776000,3,51.004478454589844,44.02774429321289,0.8091054558753967,0.824268102645874,0.8692331314086914,2628443,2684743,92715,53504,1147557,1091257,45226,27252,44553,27036,"tensor([13967, 14131, 14287, 14664, 15669, 16894, 18398, 19805, 20386, 16492,
3
+ 3783], device='cuda:0')","tensor([35, 35, 36, 35, 29, 22, 11, 6, 0, 0, 0], device='cuda:0')",59000,2006000,3,51.004478454589844,44.02774429321289,0.7306276559829712,0.7722402215003967,0.8160722851753235,1178985,1274433,42336,20180,827015,731567,12846,5641,12823,5638,"tensor([2702, 2706, 2750, 2910, 3282, 3650, 3917, 4058, 3660, 2127, 350],
4
+ device='cuda:0')","tensor([22, 22, 23, 23, 17, 10, 1, 0, 0, 0, 0], device='cuda:0')",59000,506839,3,51.004478454589844,44.02774429321289,0.31626853346824646,0.6373100876808167,0.8312321305274963,95759,238643,2957,3425,411080,268196,0,0,0,0,"tensor([217, 217, 245, 279, 345, 419, 472, 525, 579, 579, 205],
5
+ device='cuda:0')","tensor([29, 29, 52, 56, 46, 40, 32, 32, 34, 35, 6], device='cuda:0')"