carinnew commited on
Commit
37d6368
·
verified ·
1 Parent(s): 8e99d2c

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/azureuser/autogenper/models/gpt2-english/gpt2-large-v1/checkpoint_0_405000",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_embd": 1280,
15
+ "n_head": 20,
16
+ "n_inner": null,
17
+ "n_layer": 36,
18
+ "n_positions": 1024,
19
+ "reorder_and_upcast_attn": false,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
+ "scale_attn_weights": true,
23
+ "summary_activation": null,
24
+ "summary_first_dropout": 0.1,
25
+ "summary_proj_to_labels": true,
26
+ "summary_type": "cls_index",
27
+ "summary_use_proj": true,
28
+ "transformers_version": "4.36.0",
29
+ "use_cache": true,
30
+ "vocab_size": 50257
31
+ }
epoch_losses.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train_log": [{"learning_rate": 2.9999999242136255e-05, "loss": 3.3136401176452637, "step": 500}, {"learning_rate": 2.9534117857110687e-05, "loss": 3.310513496398926, "step": 1000}, {"learning_rate": 2.9068236472085118e-05, "loss": 3.3584189414978027, "step": 1500}, {"learning_rate": 2.8602353268070146e-05, "loss": 3.324364423751831, "step": 2000}, {"learning_rate": 2.8136471883044578e-05, "loss": 3.312864303588867, "step": 2500}, {"learning_rate": 2.767059049801901e-05, "loss": 3.3853697776794434, "step": 3000}, {"learning_rate": 2.720470911299344e-05, "loss": 3.2357020378112793, "step": 3500}, {"learning_rate": 2.673882590897847e-05, "loss": 3.2864274978637695, "step": 4000}, {"learning_rate": 2.62729445239529e-05, "loss": 3.3316233158111572, "step": 4500}, {"learning_rate": 2.5807063138927333e-05, "loss": 3.2906289100646973, "step": 5000}, {"learning_rate": 2.5341181753901765e-05, "loss": 3.2855005264282227, "step": 5500}, {"learning_rate": 2.4875298549886793e-05, "loss": 3.2512764930725098, "step": 6000}, {"learning_rate": 2.4409417164861225e-05, "loss": 3.167151927947998, "step": 6500}, {"learning_rate": 2.3943535779835656e-05, "loss": 3.218813896179199, "step": 7000}, {"learning_rate": 2.3477654394810088e-05, "loss": 3.2747364044189453, "step": 7500}, {"learning_rate": 2.3011771190795116e-05, "loss": 3.220797300338745, "step": 8000}, {"learning_rate": 2.2545889805769548e-05, "loss": 3.218301296234131, "step": 8500}, {"learning_rate": 2.208000842074398e-05, "loss": 3.254472255706787, "step": 9000}, {"learning_rate": 2.161412703571841e-05, "loss": 3.173828363418579, "step": 9500}, {"learning_rate": 2.1148245650692843e-05, "loss": 3.3247807025909424, "step": 10000}, {"learning_rate": 2.0682360627688468e-05, "loss": 3.2380363941192627, "step": 10500}, {"learning_rate": 2.02164792426629e-05, "loss": 3.2388548851013184, "step": 11000}, {"learning_rate": 1.975059785763733e-05, "loss": 3.1178417205810547, "step": 11500}, {"learning_rate": 1.9284716472611763e-05, "loss": 3.178191661834717, "step": 12000}, {"learning_rate": 1.881883326859679e-05, "loss": 3.2063257694244385, "step": 12500}, {"learning_rate": 1.8352951883571222e-05, "loss": 3.2435309886932373, "step": 13000}, {"learning_rate": 1.7887070498545654e-05, "loss": 3.157283306121826, "step": 13500}, {"learning_rate": 1.7421189113520086e-05, "loss": 3.239788770675659, "step": 14000}, {"learning_rate": 1.6955305909505114e-05, "loss": 3.1216375827789307, "step": 14500}, {"learning_rate": 1.6489424524479546e-05, "loss": 3.192378282546997, "step": 15000}, {"learning_rate": 1.6023543139453977e-05, "loss": 3.2471261024475098, "step": 15500}, {"learning_rate": 1.555766175442841e-05, "loss": 3.197913646697998, "step": 16000}, {"learning_rate": 1.5091779459908139e-05, "loss": 3.294142961502075, "step": 16500}, {"learning_rate": 1.462589807488257e-05, "loss": 3.235691547393799, "step": 17000}, {"learning_rate": 1.41600157803623e-05, "loss": 3.185995578765869, "step": 17500}, {"learning_rate": 1.3694134395336732e-05, "loss": 3.242919445037842, "step": 18000}, {"learning_rate": 1.3228252100816462e-05, "loss": 3.300860643386841, "step": 18500}, {"learning_rate": 1.2762370715790894e-05, "loss": 3.2015135288238525, "step": 19000}, {"learning_rate": 1.2296488421270624e-05, "loss": 3.1737678050994873, "step": 19500}, {"learning_rate": 1.1830605217255652e-05, "loss": 3.217064142227173, "step": 20000}, {"learning_rate": 1.1364722922735382e-05, "loss": 3.172595262527466, "step": 20500}, {"learning_rate": 1.0898841537709814e-05, "loss": 3.286386728286743, "step": 21000}, {"learning_rate": 1.0432959243189543e-05, "loss": 3.1802852153778076, "step": 21500}, {"learning_rate": 9.967077858163975e-06, "loss": 3.261125326156616, "step": 22000}, {"learning_rate": 9.501195563643705e-06, "loss": 3.20611572265625, "step": 22500}, {"learning_rate": 9.035314178618137e-06, "loss": 3.2354695796966553, "step": 23000}, {"learning_rate": 8.569431884097867e-06, "loss": 3.1656131744384766, "step": 23500}, {"learning_rate": 8.103550499072298e-06, "loss": 3.2892768383026123, "step": 24000}, {"learning_rate": 7.637668204552028e-06, "loss": 3.220771312713623, "step": 24500}, {"learning_rate": 7.17178681952646e-06, "loss": 3.1672675609588623, "step": 25000}, {"learning_rate": 6.705904979753541e-06, "loss": 3.1718244552612305, "step": 25500}, {"learning_rate": 6.240023139980622e-06, "loss": 3.195019006729126, "step": 26000}, {"learning_rate": 5.7741413002077024e-06, "loss": 3.2017009258270264, "step": 26500}, {"learning_rate": 5.308259460434783e-06, "loss": 3.2062506675720215, "step": 27000}, {"learning_rate": 4.842377620661864e-06, "loss": 3.1611135005950928, "step": 27500}, {"learning_rate": 4.376495780888945e-06, "loss": 3.2003793716430664, "step": 28000}, {"learning_rate": 3.910613941116026e-06, "loss": 3.2053701877593994, "step": 28500}, {"learning_rate": 3.444732328716782e-06, "loss": 3.1706700325012207, "step": 29000}, {"learning_rate": 2.9788504889438627e-06, "loss": 3.166384696960449, "step": 29500}, {"learning_rate": 2.5129686491709435e-06, "loss": 3.2092933654785156, "step": 30000}, {"learning_rate": 2.0470868093980243e-06, "loss": 3.271768569946289, "step": 30500}, {"learning_rate": 1.5812050833119429e-06, "loss": 3.20021390914917, "step": 31000}, {"learning_rate": 1.1153232435390237e-06, "loss": 3.2410552501678467, "step": 31500}, {"learning_rate": 6.494414606095233e-07, "loss": 3.2000813484191895, "step": 32000}], "eval_log": [{"perplexity": 25.468077473351187, "loss": 3.2374258041381836, "step": 500}, {"perplexity": 25.551455568478357, "loss": 3.240694284439087, "step": 1000}, {"perplexity": 25.419965444736942, "loss": 3.235534906387329, "step": 1500}, {"perplexity": 25.32171965478965, "loss": 3.2316625118255615, "step": 2000}, {"perplexity": 25.29025538612487, "loss": 3.230419158935547, "step": 2500}, {"perplexity": 25.185328327791904, "loss": 3.226261615753174, "step": 3000}, {"perplexity": 25.238928750056186, "loss": 3.2283875942230225, "step": 3500}, {"perplexity": 25.219548037044778, "loss": 3.2276194095611572, "step": 4000}, {"perplexity": 25.04909831558491, "loss": 3.2208378314971924, "step": 4500}, {"perplexity": 25.02766142960091, "loss": 3.2199816703796387, "step": 5000}, {"perplexity": 24.96767497899088, "loss": 3.2175819873809814, "step": 5500}, {"perplexity": 25.07210770532241, "loss": 3.2217559814453125, "step": 6000}, {"perplexity": 25.009104790338036, "loss": 3.2192399501800537, "step": 6500}, {"perplexity": 24.95307118446496, "loss": 3.216996908187866, "step": 7000}, {"perplexity": 24.81299154671458, "loss": 3.21136736869812, "step": 7500}, {"perplexity": 24.777988344374865, "loss": 3.2099556922912598, "step": 8000}, {"perplexity": 24.760832903332474, "loss": 3.2092630863189697, "step": 8500}, {"perplexity": 24.660018984685504, "loss": 3.205183267593384, "step": 9000}, {"perplexity": 24.598714306954328, "loss": 3.2026941776275635, "step": 9500}, {"perplexity": 24.54127134623571, "loss": 3.2003562450408936, "step": 10000}, {"perplexity": 24.56102088335186, "loss": 3.2011606693267822, "step": 10500}, {"perplexity": 24.479053738627695, "loss": 3.197817802429199, "step": 11000}, {"perplexity": 24.438525310478198, "loss": 3.1961607933044434, "step": 11500}, {"perplexity": 24.37873609480629, "loss": 3.193711280822754, "step": 12000}, {"perplexity": 24.309261524537643, "loss": 3.190857410430908, "step": 12500}, {"perplexity": 24.269610194925303, "loss": 3.1892249584198, "step": 13000}, {"perplexity": 24.229544064459102, "loss": 3.187572717666626, "step": 13500}, {"perplexity": 24.156002037254957, "loss": 3.184532880783081, "step": 14000}, {"perplexity": 24.110897519855648, "loss": 3.182663917541504, "step": 14500}, {"perplexity": 24.040145694104016, "loss": 3.179725170135498, "step": 15000}, {"perplexity": 23.937614261789673, "loss": 3.1754510402679443, "step": 15500}, {"perplexity": 23.898278544325894, "loss": 3.1738064289093018, "step": 16000}, {"perplexity": 23.93487497598575, "loss": 3.1753365993499756, "step": 16500}, {"perplexity": 23.848838719228368, "loss": 3.1717355251312256, "step": 17000}, {"perplexity": 23.86254593189651, "loss": 3.1723101139068604, "step": 17500}, {"perplexity": 23.6973463460015, "loss": 3.165363073348999, "step": 18000}, {"perplexity": 23.718944403083444, "loss": 3.166274070739746, "step": 18500}, {"perplexity": 23.705410107331915, "loss": 3.165703296661377, "step": 19000}, {"perplexity": 23.70806095451715, "loss": 3.1658151149749756, "step": 19500}, {"perplexity": 23.654406601298422, "loss": 3.1635494232177734, "step": 20000}, {"perplexity": 23.595033688896933, "loss": 3.161036252975464, "step": 20500}, {"perplexity": 23.558378341798278, "loss": 3.1594815254211426, "step": 21000}, {"perplexity": 23.54907321677428, "loss": 3.1590864658355713, "step": 21500}, {"perplexity": 23.50716995420986, "loss": 3.1573054790496826, "step": 22000}, {"perplexity": 23.462997245246083, "loss": 3.1554245948791504, "step": 22500}, {"perplexity": 23.405789985812596, "loss": 3.1529834270477295, "step": 23000}, {"perplexity": 23.349223223182403, "loss": 3.1505637168884277, "step": 23500}, {"perplexity": 23.361618406152726, "loss": 3.151094436645508, "step": 24000}, {"perplexity": 23.288595152006764, "loss": 3.147963762283325, "step": 24500}, {"perplexity": 23.295731122455553, "loss": 3.1482701301574707, "step": 25000}, {"perplexity": 23.236444096571898, "loss": 3.145721912384033, "step": 25500}, {"perplexity": 23.18787035713028, "loss": 3.143629312515259, "step": 26000}, {"perplexity": 23.180800587070923, "loss": 3.143324375152588, "step": 26500}, {"perplexity": 23.19157469378757, "loss": 3.143789052963257, "step": 27000}, {"perplexity": 23.159935510479826, "loss": 3.1424238681793213, "step": 27500}, {"perplexity": 23.096990325473765, "loss": 3.139702320098877, "step": 28000}, {"perplexity": 23.074066088918205, "loss": 3.138709306716919, "step": 28500}, {"perplexity": 23.045856248620236, "loss": 3.137485980987549, "step": 29000}, {"perplexity": 23.053555413384874, "loss": 3.13782000541687, "step": 29500}, {"perplexity": 23.044587040141664, "loss": 3.1374309062957764, "step": 30000}, {"perplexity": 23.02538162833782, "loss": 3.136597156524658, "step": 30500}, {"perplexity": 22.99813601596418, "loss": 3.13541316986084, "step": 31000}, {"perplexity": 23.007009517330534, "loss": 3.135798931121826, "step": 31500}, {"perplexity": 22.984689700643717, "loss": 3.1348283290863037, "step": 32000}]}
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c240505a3fceea184ff8d389ea26a28065948ae083f2137811288bb3e844ddb
3
+ size 3096134690
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.36.0"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": null,
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff