diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..894622d904b85c1b04efd4d63cc53007fe8ace2d --- /dev/null +++ b/config.json @@ -0,0 +1,25 @@ +{ + "apply_residual_connection_post_layernorm": false, + "architectures": [ + "BloomForCausalLM" + ], + "attention_dropout": 0.0, + "attention_softmax_in_fp32": true, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_dropout": 0.0, + "hidden_size": 14336, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "masked_softmax_fusion": true, + "model_type": "bloom", + "n_head": 112, + "n_layer": 70, + "pad_token_id": 3, + "pretraining_tp": 1, + "slow_but_exact": false, + "torch_dtype": "float16", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 250880 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d8f9e66eb23995a5221342ec6f1b2b9101577bb --- /dev/null +++ b/generation_config.json @@ -0,0 +1,5 @@ +{ + "temperature": null, + "top_p": null, + "transformers_version": "4.42.4" +} diff --git a/model-00001-of-00071.safetensors b/model-00001-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fcded9de1fabb54bca56b64ff8fa663d4bceeea --- /dev/null +++ b/model-00001-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2b51e1bc290274b7b6706fc8f79ee2ab745c5354e26a1a9197e5cfebf0906d +size 7193231512 diff --git a/model-00002-of-00071.safetensors b/model-00002-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..226900ba50b7631bd15e26faabddd7148bd67fc3 --- /dev/null +++ b/model-00002-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d2e65739e4e3d5c4394dba273cf4cbb04655420fdacc520ffb07ac03434495 +size 4932990808 diff --git a/model-00003-of-00071.safetensors b/model-00003-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09fed1fb538bfd442209fcac33fb5ec993fa371c --- /dev/null +++ b/model-00003-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f0ce2671af13ae711760028671e3397707403a8b9e6103e289c8927d3468f3 +size 4932875688 diff --git a/model-00004-of-00071.safetensors b/model-00004-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5587912376ece409c78b84c960549f73bce63dc --- /dev/null +++ b/model-00004-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bfaaacf5070b46f2dd5e8767716e73ba0f2a29fb083833487b26976a0e70f92 +size 4932875688 diff --git a/model-00005-of-00071.safetensors b/model-00005-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2975e5e290a573d78f3cd2d49f321da9a4a6eeb --- /dev/null +++ b/model-00005-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1e8c69347a1fa3884d146d68724cf8ec7cf5f5bfbfc5fe63fac60535b405c28 +size 4932875688 diff --git a/model-00006-of-00071.safetensors b/model-00006-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0d921a7f7f4dda99a5f5579ad52d0eda1fb499b --- /dev/null +++ b/model-00006-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43a99e0c267f33281e405c23921ad4592c717663f1c4d47d914815b684002ca +size 4932875688 diff --git a/model-00007-of-00071.safetensors b/model-00007-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..999942bddb174a461eceff2c4d7b829e2e56941a --- /dev/null +++ b/model-00007-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e7691884545c2068eae4ccfc5fafb1a682c096811a2a0700a82837b0b1fac9 +size 4932875688 diff --git a/model-00008-of-00071.safetensors b/model-00008-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ae2444c2e8d1b83a33f4210f5210246ed6fa94c --- /dev/null +++ b/model-00008-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8048be64924f2243cd19f515a67fc19ecea87be6aa896dd9e0a6163f77ea6b90 +size 4932875688 diff --git a/model-00009-of-00071.safetensors b/model-00009-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f64c8483b97be2ff18735c9e4e3ce2df65e4909e --- /dev/null +++ b/model-00009-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d789cefd876426dc4ff39071020a942a310316d005f34711466a4a057afe8fe +size 4932875688 diff --git a/model-00010-of-00071.safetensors b/model-00010-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99690579bbdfbc735ff0621c2839fa8705fc23ce --- /dev/null +++ b/model-00010-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0eb4bfe265248a8fc967707f2301b5a85c0dccecced1eb9eda19b0b44646c2 +size 4932875688 diff --git a/model-00011-of-00071.safetensors b/model-00011-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0560a704ac7abb20dcc8429580fda4ff39ea2799 --- /dev/null +++ b/model-00011-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d7668570857eadb8600103f14d7e79726446b3e8d8bc210d27803ce08a366f +size 4932875672 diff --git a/model-00012-of-00071.safetensors b/model-00012-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0f59b410c2badedd8c4e0b7cca5726353a6058f --- /dev/null +++ b/model-00012-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6fcc8cbaa0649faea7f68ac724140b8155b9752a075f63a67beada36b28a04a +size 4932875696 diff --git a/model-00013-of-00071.safetensors b/model-00013-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17a41d285f1450e1576becf10e373980a125147e --- /dev/null +++ b/model-00013-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42288ef814e311fe6f2b6f8aa31d21e02a66e40d8b236606dab818e494a1b7a0 +size 4932875696 diff --git a/model-00014-of-00071.safetensors b/model-00014-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8fa8fb60112c976c312c8717d3fc53d18dfd18b --- /dev/null +++ b/model-00014-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e1626cba5a0147914ab56f243453722b599518575625ac80079739990e3fb23 +size 4932875696 diff --git a/model-00015-of-00071.safetensors b/model-00015-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8acbfceace097495d5706aff71556dbc5334dbd --- /dev/null +++ b/model-00015-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:821c132ce66f28ef6a566cc4d9860c748443f4dc1bd49a9b581e32de540a39f3 +size 4932875696 diff --git a/model-00016-of-00071.safetensors b/model-00016-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..327a5ca94bfe86e8305a1b3040838e6ec0fb9879 --- /dev/null +++ b/model-00016-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78e1d4e7ce4d01d34ff4d2b49913ef829b038591f4e169fa8f784a600b1ff65b +size 4932875696 diff --git a/model-00017-of-00071.safetensors b/model-00017-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5ea7b8f6223e4e899f5af2492cf58edd1a6313a --- /dev/null +++ b/model-00017-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f67dca38a40f8f013f0e2e32a320858d6a83643af4356908de4c61fa72068f6 +size 4932875696 diff --git a/model-00018-of-00071.safetensors b/model-00018-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1625b7d73d6373ac8489d97f6f6a0d81939771de --- /dev/null +++ b/model-00018-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51b9f05455d0656307d431eec39c13930747c2647312cf6d724f70a8e87218d +size 4932875696 diff --git a/model-00019-of-00071.safetensors b/model-00019-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c299cffcfaebb6837f13c17042e0793c011a65e --- /dev/null +++ b/model-00019-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27959f6770cb26b305bf5e5492fa36239bf39480e0feb3f35ec2e3df1c24077a +size 4932875696 diff --git a/model-00020-of-00071.safetensors b/model-00020-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f89f36384c7324c6d350ea61ab598b284afbe16 --- /dev/null +++ b/model-00020-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190301735eeaea45449f21fa754d4711761ed15e270f3f955b6998c8904d7c3d +size 4932875696 diff --git a/model-00021-of-00071.safetensors b/model-00021-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e44f6383f2fc3b41cd2628100fbcb74b9e1ad005 --- /dev/null +++ b/model-00021-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2577b35f6f8c9993f66fe7c4e2a625e7ec0c1d2609d688c61a9345d69833d6ea +size 4932875696 diff --git a/model-00022-of-00071.safetensors b/model-00022-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea674fd312888a53589efdb6374e43259e222830 --- /dev/null +++ b/model-00022-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4bedf14447f0371ac894baee0e40b6b2a933800a7b938702b17874b1fafb9e +size 4932875696 diff --git a/model-00023-of-00071.safetensors b/model-00023-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c79624dca973158ba88a8ca925665300be1f8fa8 --- /dev/null +++ b/model-00023-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c677a1168f6fada6fec74f1e9a2d15f1be0956c10e0f8322e1a7fb8cfb3a4aa0 +size 4932875696 diff --git a/model-00024-of-00071.safetensors b/model-00024-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16c955f84d021f494ebcc68fd1cd301efdfc5c52 --- /dev/null +++ b/model-00024-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de327795b9fc815a8f923ffba34f0b582a620103705d6d75e72f253828e0b08f +size 4932875696 diff --git a/model-00025-of-00071.safetensors b/model-00025-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58605027ae210ef2aa7225cc10f3a7069b4ce344 --- /dev/null +++ b/model-00025-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98addc9b8969ea6732acd471cdec364f405ac3fb7430a571a4c031187974a785 +size 4932875696 diff --git a/model-00026-of-00071.safetensors b/model-00026-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42309ee8083f453e220bf2a9d71399efe59e4b56 --- /dev/null +++ b/model-00026-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d8f748373decb825efd1952280a654adb038a5185a7c88cfa1b73cf29fbaf6 +size 4932875696 diff --git a/model-00027-of-00071.safetensors b/model-00027-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf260ba98adf8888bd356709d6b08c8bb20787f9 --- /dev/null +++ b/model-00027-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed7eecdc5bee5ad7e25b8e085d7b8d79d2335dd6088401d939ca12fdaa19dbe +size 4932875696 diff --git a/model-00028-of-00071.safetensors b/model-00028-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f001d7f33a9c9789825d50e5f7dd89f7675be05 --- /dev/null +++ b/model-00028-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd23dd033cf889e70fe6b1053e055580b98e8407dc33c448820505d8abc8541 +size 4932875696 diff --git a/model-00029-of-00071.safetensors b/model-00029-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e558edf9db90dc750d60f14ff57dbe8f425d0af8 --- /dev/null +++ b/model-00029-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45169c70d71f2bc36cefbe79b59769b694429a9b5ef24a73f30865a0992a20aa +size 4932875696 diff --git a/model-00030-of-00071.safetensors b/model-00030-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6faec7eb802ea708548f06356893c90411eecff4 --- /dev/null +++ b/model-00030-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95714a0b56124a4fe136113c8ea05ea7dfcc28570d58957d0ddb6a988ef88650 +size 4932875696 diff --git a/model-00031-of-00071.safetensors b/model-00031-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13fb497f46b1abb2d73c63a258b46585f5918103 --- /dev/null +++ b/model-00031-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eba90d6cdd439aa355dfab8ef702592c6e0b2f45a4506a0f6fb4684261593439 +size 4932875696 diff --git a/model-00032-of-00071.safetensors b/model-00032-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd3a11af6efce13650328475a49fd8970d295be3 --- /dev/null +++ b/model-00032-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5cfa765cd889beaba7012ee21d33681526fec98df2a03ec4531a82c5c93a4f +size 4932875696 diff --git a/model-00033-of-00071.safetensors b/model-00033-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15404fdb361171c84174912979a4808acaac9521 --- /dev/null +++ b/model-00033-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4991a23c0317bf779b9923b9059f99ff57106c940b5663f95ef0d849e0605d44 +size 4932875696 diff --git a/model-00034-of-00071.safetensors b/model-00034-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b83f50f551c3fb7a45f02c5d675a266e85b5d19 --- /dev/null +++ b/model-00034-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62bc513f6206582ddcc054a38857ca397b62aa79992c4c4ae8e17fcfda4bc29c +size 4932875696 diff --git a/model-00035-of-00071.safetensors b/model-00035-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2ea899101871cbff7d9c81e9a9c4b4b6a1e9ab7 --- /dev/null +++ b/model-00035-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a042a40fc94aa5be3469e90af7761948ed3e577022029f6eb9e492e92e4c61a4 +size 4932875696 diff --git a/model-00036-of-00071.safetensors b/model-00036-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85211bfd9a0823204795398baae6e4e36891cd36 --- /dev/null +++ b/model-00036-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52a90446777309fa02db96f48e5f7dfa6a81f1242a4e8bbd151f4cb0a889da7 +size 4932875696 diff --git a/model-00037-of-00071.safetensors b/model-00037-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9f1236de771e202113390d3ae45cfc66753921d --- /dev/null +++ b/model-00037-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7916db87e1ac041084510517f7539ea81c7aa51f2dff0c8ccb5ab4af6197f3 +size 4932875696 diff --git a/model-00038-of-00071.safetensors b/model-00038-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7defe1bb792be78a7b0e4f121dda6ba679d7739 --- /dev/null +++ b/model-00038-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b94151315210a22454ad36c964ec71cf9343fc8a7f9110b4f03529070dbc665 +size 4932875696 diff --git a/model-00039-of-00071.safetensors b/model-00039-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9236525ec57fa3901ba056d2aa60cf11252c9f5e --- /dev/null +++ b/model-00039-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936015c6c1a32bfc0ddbc442662267ac27c1fe5c44715389765f7bffcee78a62 +size 4932875696 diff --git a/model-00040-of-00071.safetensors b/model-00040-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e58a09e59fd849461e0e0b3a7ebc7992b99de7ed --- /dev/null +++ b/model-00040-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51dbf2e08d0b6d785a26e10d2974ab1ff20f73fa3cf622952dd09f5c6024cb19 +size 4932875696 diff --git a/model-00041-of-00071.safetensors b/model-00041-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33e11bcfafbe7efddf379818d82e4a888d9d9131 --- /dev/null +++ b/model-00041-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bc5044e305c89551a26a993ab7c25d9c5e3a3b3c8969c083aa5a6883eba84a +size 4932875696 diff --git a/model-00042-of-00071.safetensors b/model-00042-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d81a34b62e88a39d8ce15f96c492a9b556d2b83 --- /dev/null +++ b/model-00042-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75660aea77313b042f1f07a11aa37098e6f2fae7bc35d1bcc51a8da74ce8e01 +size 4932875696 diff --git a/model-00043-of-00071.safetensors b/model-00043-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9fa571806e88d9563c83453f655ecba47afb0bd --- /dev/null +++ b/model-00043-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51a2d972e32d174df66fb4d44e3af2f6b9a0f043b1c01c4e4d0f536f063454d +size 4932875696 diff --git a/model-00044-of-00071.safetensors b/model-00044-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9270fa6a4032d650d24720383d83e1cd1b86a79f --- /dev/null +++ b/model-00044-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e327630a483ba34c5ab58b76285849e627f096a791bb7b279861b573bc778ecc +size 4932875696 diff --git a/model-00045-of-00071.safetensors b/model-00045-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd0314ce0b7f0550fd26e563dfac72d06d9e0e55 --- /dev/null +++ b/model-00045-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a4d75e0c2a1299dab0c3abfbab84b2a4233e061876cd9c01ddb001fe149591 +size 4932875696 diff --git a/model-00046-of-00071.safetensors b/model-00046-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e34a6638e1430b6892fa58d371a233d445310268 --- /dev/null +++ b/model-00046-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a494a68d6bc4b062ff8c05304e80eee487b06abb071f36b61f32407908911c30 +size 4932875696 diff --git a/model-00047-of-00071.safetensors b/model-00047-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db9171c08f54826a227ad05ad764bba5afe1bb5d --- /dev/null +++ b/model-00047-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965a890f048c370b46e5026779aae91c39587d36b930c5207c4b236d36fe5045 +size 4932875696 diff --git a/model-00048-of-00071.safetensors b/model-00048-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d48d4d16c83244c1db52c2907efa51e08b97a0cd --- /dev/null +++ b/model-00048-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64a232893d9261c449f2f4012e8d0ca40a2c67547fb42a1aba60de5c1b7af5b +size 4932875696 diff --git a/model-00049-of-00071.safetensors b/model-00049-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4195c4f081de8612f32fd56183a53e903744109a --- /dev/null +++ b/model-00049-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a09cb142651c5738a7f27d296c09537b88d4e66f5db777584482fa332a61bc +size 4932875696 diff --git a/model-00050-of-00071.safetensors b/model-00050-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2935a9f11917234cb8a7f1a645f4216a6323f7a3 --- /dev/null +++ b/model-00050-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e0256aca0e7f8a2190b5174d26ef9cc28af1021b49b2909f6f4bf4d5b1821a +size 4932875696 diff --git a/model-00051-of-00071.safetensors b/model-00051-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..996acd650875c3a29a4dfc44f5e1d0c14966092e --- /dev/null +++ b/model-00051-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4046cc8501560cdb4b26f69ab248f0a539618ce5b5614b63a4bdd397ff1d3752 +size 4932875696 diff --git a/model-00052-of-00071.safetensors b/model-00052-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b868bc69cfa6c63801e1f3cb67107512a662fa82 --- /dev/null +++ b/model-00052-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dea14d95942a12fa3a910b8b69ae1fdf1186024571cd0a8a3b67a6785799610 +size 4932875696 diff --git a/model-00053-of-00071.safetensors b/model-00053-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06cd65896ff56165bcf857aa9dcd30d015e42e2c --- /dev/null +++ b/model-00053-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f02288b069f201adc26e219e88ffde990cb5b3c0bbc13c5a1fe8a5bc879276 +size 4932875696 diff --git a/model-00054-of-00071.safetensors b/model-00054-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6ff20475d3b5fbb55bceaf379bd3521432ecadf --- /dev/null +++ b/model-00054-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a65ff6218ac62f7c8fd32e8cccb681c5ff55c871af83b142f0756d8bcff1b7a +size 4932875696 diff --git a/model-00055-of-00071.safetensors b/model-00055-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9941d691624135899b61816c9fb57e058c71653a --- /dev/null +++ b/model-00055-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcdd94a33f224b65e9b870714f79145379fdf728512d92cfb820d12f4ae3208e +size 4932875696 diff --git a/model-00056-of-00071.safetensors b/model-00056-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d141829f14522c1ac5934cd1c4f74a4c1cd5d8c --- /dev/null +++ b/model-00056-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c7dcdfff673290a547e7e00fcc6d2712f8e938475003a5bd47413243177c61 +size 4932875696 diff --git a/model-00057-of-00071.safetensors b/model-00057-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b4303dbf25b12ac22bd96ee828d071094d4af35 --- /dev/null +++ b/model-00057-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac61a40f1b1a5078505c15694db776988366fde86010eeb061efdad5e0bc730 +size 4932875696 diff --git a/model-00058-of-00071.safetensors b/model-00058-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47d2964f890125ad5c8a7aedb8017f403e2d4943 --- /dev/null +++ b/model-00058-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b443f5a1e0b06343b159502013b7033354620548f1f1d58021a6cc861f2ac796 +size 4932875696 diff --git a/model-00059-of-00071.safetensors b/model-00059-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d8b0be4d6a9a0f30e278a047d8794c26b208ccb --- /dev/null +++ b/model-00059-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa9521b7acd3d644df0f27074b3df0a95b36a99bc23a317c94c351b9ec2cb3b +size 4932875696 diff --git a/model-00060-of-00071.safetensors b/model-00060-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07dcc357e5673f250baf9a039eb38c1d3c5198c7 --- /dev/null +++ b/model-00060-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86396bf9202fb287771ee0b67a3242f340a73d2213471f591073a26e202e27f9 +size 4932875696 diff --git a/model-00061-of-00071.safetensors b/model-00061-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a385a8a8ad9439c19ff74aad4af278d9cff23d44 --- /dev/null +++ b/model-00061-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc508a299088024db3298a6696d21f6d49410aa9877aad4c6200418ac1e0f4a +size 4932875696 diff --git a/model-00062-of-00071.safetensors b/model-00062-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4b14c16ef0239a94c3dbb19f5a3e60e9a014015 --- /dev/null +++ b/model-00062-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb78b4be38f735bb87cef4f10ee3226fb54dfb268192a39f03ef1a9bdf3cd68a +size 4932875696 diff --git a/model-00063-of-00071.safetensors b/model-00063-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccbe5e27891f69beb87d64cb1fa42b98914fcf29 --- /dev/null +++ b/model-00063-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3affeea4655e9d87a8f4ff6bb3ce3dfa51da5c3df48d2bf4b7829c7cbd4262c4 +size 4932875696 diff --git a/model-00064-of-00071.safetensors b/model-00064-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b75c1df54d4f7aeb0f70ad5c7728f950e4fbc080 --- /dev/null +++ b/model-00064-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54684ed59441dcb502427225dd0999e0357d3811358675c087bc279c1e47f4a7 +size 4932875696 diff --git a/model-00065-of-00071.safetensors b/model-00065-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16db3ceab56068ec4154dcce17abf9a3208b7828 --- /dev/null +++ b/model-00065-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e4b7d3018e6ed13a86ad48336f6abd09d58bf2a0f1cb30671d6e4e19c72652 +size 4932875696 diff --git a/model-00066-of-00071.safetensors b/model-00066-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd47d6242455ce9225c42de4b9037e06a73bb2d9 --- /dev/null +++ b/model-00066-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbe2c814387b49772bf597d4b9ab5c0b9553d7b716d19ef7a289a9bfc87d4dc +size 4932875696 diff --git a/model-00067-of-00071.safetensors b/model-00067-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..546be226b308b6a16c5099dfb56bbf7f7a5e379c --- /dev/null +++ b/model-00067-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c5b9d9255a506a42c5c3e8d8f4d6ec6d2f34cab6b37aa7405b4d88f0ecceca +size 4932875696 diff --git a/model-00068-of-00071.safetensors b/model-00068-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e2e5cfa55d4a807a3bd8d320d61fba082e7fc0f --- /dev/null +++ b/model-00068-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade405fcb8eccf20a5a4eeefea75dbc25253e6c7f4c383f2e5bdace667f3fc4c +size 4932875696 diff --git a/model-00069-of-00071.safetensors b/model-00069-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47d6b14d748828f9fc996bc124f2a8355c4e8c9a --- /dev/null +++ b/model-00069-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:889221479a0372deb83074401d90277db9f954bc91c98a7c314fc0cfeb7116c7 +size 4932875696 diff --git a/model-00070-of-00071.safetensors b/model-00070-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcc0c6dd3e7353e5aee58961c9b1a0451ce50d59 --- /dev/null +++ b/model-00070-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910d8ad7fbde03d825d640f2f031a4a4f4ef7159ab78989341b3150120096107 +size 4932875696 diff --git a/model-00071-of-00071.safetensors b/model-00071-of-00071.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b26f1151cee30a15979dec529a80316b8c8ca8fe --- /dev/null +++ b/model-00071-of-00071.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e6c2b45c66e53cad9afc2b31fe3f9ae32f31c8050109b9e1609bd1b73141aa +size 4932875664 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..6bf13fcbfe7d69ccd1690b3665a4ad9ba48d2d36 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,852 @@ +{ + "metadata": { + "total_size": 352494542848 + }, + "weight_map": { + "transformer.h.0.input_layernorm.bias": "model-00002-of-00071.safetensors", + "transformer.h.0.input_layernorm.weight": "model-00002-of-00071.safetensors", + "transformer.h.0.mlp.dense_4h_to_h.bias": "model-00002-of-00071.safetensors", + "transformer.h.0.mlp.dense_4h_to_h.weight": "model-00002-of-00071.safetensors", + "transformer.h.0.mlp.dense_h_to_4h.bias": "model-00002-of-00071.safetensors", + "transformer.h.0.mlp.dense_h_to_4h.weight": "model-00002-of-00071.safetensors", + "transformer.h.0.post_attention_layernorm.bias": "model-00002-of-00071.safetensors", + "transformer.h.0.post_attention_layernorm.weight": "model-00002-of-00071.safetensors", + "transformer.h.0.self_attention.dense.bias": "model-00002-of-00071.safetensors", + "transformer.h.0.self_attention.dense.weight": "model-00002-of-00071.safetensors", + "transformer.h.0.self_attention.query_key_value.bias": "model-00002-of-00071.safetensors", + "transformer.h.0.self_attention.query_key_value.weight": "model-00002-of-00071.safetensors", + "transformer.h.1.input_layernorm.bias": "model-00002-of-00071.safetensors", + "transformer.h.1.input_layernorm.weight": "model-00002-of-00071.safetensors", + "transformer.h.1.mlp.dense_4h_to_h.bias": "model-00003-of-00071.safetensors", + "transformer.h.1.mlp.dense_4h_to_h.weight": "model-00003-of-00071.safetensors", + "transformer.h.1.mlp.dense_h_to_4h.bias": "model-00003-of-00071.safetensors", + "transformer.h.1.mlp.dense_h_to_4h.weight": "model-00003-of-00071.safetensors", + "transformer.h.1.post_attention_layernorm.bias": "model-00003-of-00071.safetensors", + "transformer.h.1.post_attention_layernorm.weight": "model-00003-of-00071.safetensors", + "transformer.h.1.self_attention.dense.bias": "model-00003-of-00071.safetensors", + "transformer.h.1.self_attention.dense.weight": "model-00003-of-00071.safetensors", + "transformer.h.1.self_attention.query_key_value.bias": "model-00003-of-00071.safetensors", + "transformer.h.1.self_attention.query_key_value.weight": "model-00003-of-00071.safetensors", + "transformer.h.10.input_layernorm.bias": "model-00011-of-00071.safetensors", + "transformer.h.10.input_layernorm.weight": "model-00011-of-00071.safetensors", + "transformer.h.10.mlp.dense_4h_to_h.bias": "model-00012-of-00071.safetensors", + "transformer.h.10.mlp.dense_4h_to_h.weight": "model-00012-of-00071.safetensors", + "transformer.h.10.mlp.dense_h_to_4h.bias": "model-00012-of-00071.safetensors", + "transformer.h.10.mlp.dense_h_to_4h.weight": "model-00012-of-00071.safetensors", + "transformer.h.10.post_attention_layernorm.bias": "model-00012-of-00071.safetensors", + "transformer.h.10.post_attention_layernorm.weight": "model-00012-of-00071.safetensors", + "transformer.h.10.self_attention.dense.bias": "model-00012-of-00071.safetensors", + "transformer.h.10.self_attention.dense.weight": "model-00012-of-00071.safetensors", + "transformer.h.10.self_attention.query_key_value.bias": "model-00012-of-00071.safetensors", + "transformer.h.10.self_attention.query_key_value.weight": "model-00012-of-00071.safetensors", + "transformer.h.11.input_layernorm.bias": "model-00012-of-00071.safetensors", + "transformer.h.11.input_layernorm.weight": "model-00012-of-00071.safetensors", + "transformer.h.11.mlp.dense_4h_to_h.bias": "model-00013-of-00071.safetensors", + "transformer.h.11.mlp.dense_4h_to_h.weight": "model-00013-of-00071.safetensors", + "transformer.h.11.mlp.dense_h_to_4h.bias": "model-00013-of-00071.safetensors", + "transformer.h.11.mlp.dense_h_to_4h.weight": "model-00013-of-00071.safetensors", + "transformer.h.11.post_attention_layernorm.bias": "model-00013-of-00071.safetensors", + "transformer.h.11.post_attention_layernorm.weight": "model-00013-of-00071.safetensors", + "transformer.h.11.self_attention.dense.bias": "model-00013-of-00071.safetensors", + "transformer.h.11.self_attention.dense.weight": "model-00013-of-00071.safetensors", + "transformer.h.11.self_attention.query_key_value.bias": "model-00013-of-00071.safetensors", + "transformer.h.11.self_attention.query_key_value.weight": "model-00013-of-00071.safetensors", + "transformer.h.12.input_layernorm.bias": "model-00013-of-00071.safetensors", + "transformer.h.12.input_layernorm.weight": "model-00013-of-00071.safetensors", + "transformer.h.12.mlp.dense_4h_to_h.bias": "model-00014-of-00071.safetensors", + "transformer.h.12.mlp.dense_4h_to_h.weight": "model-00014-of-00071.safetensors", + "transformer.h.12.mlp.dense_h_to_4h.bias": "model-00014-of-00071.safetensors", + "transformer.h.12.mlp.dense_h_to_4h.weight": "model-00014-of-00071.safetensors", + "transformer.h.12.post_attention_layernorm.bias": "model-00014-of-00071.safetensors", + "transformer.h.12.post_attention_layernorm.weight": "model-00014-of-00071.safetensors", + "transformer.h.12.self_attention.dense.bias": "model-00014-of-00071.safetensors", + "transformer.h.12.self_attention.dense.weight": "model-00014-of-00071.safetensors", + "transformer.h.12.self_attention.query_key_value.bias": "model-00014-of-00071.safetensors", + "transformer.h.12.self_attention.query_key_value.weight": "model-00014-of-00071.safetensors", + "transformer.h.13.input_layernorm.bias": "model-00014-of-00071.safetensors", + "transformer.h.13.input_layernorm.weight": "model-00014-of-00071.safetensors", + "transformer.h.13.mlp.dense_4h_to_h.bias": "model-00015-of-00071.safetensors", + "transformer.h.13.mlp.dense_4h_to_h.weight": "model-00015-of-00071.safetensors", + "transformer.h.13.mlp.dense_h_to_4h.bias": "model-00015-of-00071.safetensors", + "transformer.h.13.mlp.dense_h_to_4h.weight": "model-00015-of-00071.safetensors", + "transformer.h.13.post_attention_layernorm.bias": "model-00015-of-00071.safetensors", + "transformer.h.13.post_attention_layernorm.weight": "model-00015-of-00071.safetensors", + "transformer.h.13.self_attention.dense.bias": "model-00015-of-00071.safetensors", + "transformer.h.13.self_attention.dense.weight": "model-00015-of-00071.safetensors", + "transformer.h.13.self_attention.query_key_value.bias": "model-00015-of-00071.safetensors", + "transformer.h.13.self_attention.query_key_value.weight": "model-00015-of-00071.safetensors", + "transformer.h.14.input_layernorm.bias": "model-00015-of-00071.safetensors", + "transformer.h.14.input_layernorm.weight": "model-00015-of-00071.safetensors", + "transformer.h.14.mlp.dense_4h_to_h.bias": "model-00016-of-00071.safetensors", + "transformer.h.14.mlp.dense_4h_to_h.weight": "model-00016-of-00071.safetensors", + "transformer.h.14.mlp.dense_h_to_4h.bias": "model-00016-of-00071.safetensors", + "transformer.h.14.mlp.dense_h_to_4h.weight": "model-00016-of-00071.safetensors", + "transformer.h.14.post_attention_layernorm.bias": "model-00016-of-00071.safetensors", + "transformer.h.14.post_attention_layernorm.weight": "model-00016-of-00071.safetensors", + "transformer.h.14.self_attention.dense.bias": "model-00016-of-00071.safetensors", + "transformer.h.14.self_attention.dense.weight": "model-00016-of-00071.safetensors", + "transformer.h.14.self_attention.query_key_value.bias": "model-00016-of-00071.safetensors", + "transformer.h.14.self_attention.query_key_value.weight": "model-00016-of-00071.safetensors", + "transformer.h.15.input_layernorm.bias": "model-00016-of-00071.safetensors", + "transformer.h.15.input_layernorm.weight": "model-00016-of-00071.safetensors", + "transformer.h.15.mlp.dense_4h_to_h.bias": "model-00017-of-00071.safetensors", + "transformer.h.15.mlp.dense_4h_to_h.weight": "model-00017-of-00071.safetensors", + "transformer.h.15.mlp.dense_h_to_4h.bias": "model-00017-of-00071.safetensors", + "transformer.h.15.mlp.dense_h_to_4h.weight": "model-00017-of-00071.safetensors", + "transformer.h.15.post_attention_layernorm.bias": "model-00017-of-00071.safetensors", + "transformer.h.15.post_attention_layernorm.weight": "model-00017-of-00071.safetensors", + "transformer.h.15.self_attention.dense.bias": "model-00017-of-00071.safetensors", + "transformer.h.15.self_attention.dense.weight": "model-00017-of-00071.safetensors", + "transformer.h.15.self_attention.query_key_value.bias": "model-00017-of-00071.safetensors", + "transformer.h.15.self_attention.query_key_value.weight": "model-00017-of-00071.safetensors", + "transformer.h.16.input_layernorm.bias": "model-00017-of-00071.safetensors", + "transformer.h.16.input_layernorm.weight": "model-00017-of-00071.safetensors", + "transformer.h.16.mlp.dense_4h_to_h.bias": "model-00018-of-00071.safetensors", + "transformer.h.16.mlp.dense_4h_to_h.weight": "model-00018-of-00071.safetensors", + "transformer.h.16.mlp.dense_h_to_4h.bias": "model-00018-of-00071.safetensors", + "transformer.h.16.mlp.dense_h_to_4h.weight": "model-00018-of-00071.safetensors", + "transformer.h.16.post_attention_layernorm.bias": "model-00018-of-00071.safetensors", + "transformer.h.16.post_attention_layernorm.weight": "model-00018-of-00071.safetensors", + "transformer.h.16.self_attention.dense.bias": "model-00018-of-00071.safetensors", + "transformer.h.16.self_attention.dense.weight": "model-00018-of-00071.safetensors", + "transformer.h.16.self_attention.query_key_value.bias": "model-00018-of-00071.safetensors", + "transformer.h.16.self_attention.query_key_value.weight": "model-00018-of-00071.safetensors", + "transformer.h.17.input_layernorm.bias": "model-00018-of-00071.safetensors", + "transformer.h.17.input_layernorm.weight": "model-00018-of-00071.safetensors", + "transformer.h.17.mlp.dense_4h_to_h.bias": "model-00019-of-00071.safetensors", + "transformer.h.17.mlp.dense_4h_to_h.weight": "model-00019-of-00071.safetensors", + "transformer.h.17.mlp.dense_h_to_4h.bias": "model-00019-of-00071.safetensors", + "transformer.h.17.mlp.dense_h_to_4h.weight": "model-00019-of-00071.safetensors", + "transformer.h.17.post_attention_layernorm.bias": "model-00019-of-00071.safetensors", + "transformer.h.17.post_attention_layernorm.weight": "model-00019-of-00071.safetensors", + "transformer.h.17.self_attention.dense.bias": "model-00019-of-00071.safetensors", + "transformer.h.17.self_attention.dense.weight": "model-00019-of-00071.safetensors", + "transformer.h.17.self_attention.query_key_value.bias": "model-00019-of-00071.safetensors", + "transformer.h.17.self_attention.query_key_value.weight": "model-00019-of-00071.safetensors", + "transformer.h.18.input_layernorm.bias": "model-00019-of-00071.safetensors", + "transformer.h.18.input_layernorm.weight": "model-00019-of-00071.safetensors", + "transformer.h.18.mlp.dense_4h_to_h.bias": "model-00020-of-00071.safetensors", + "transformer.h.18.mlp.dense_4h_to_h.weight": "model-00020-of-00071.safetensors", + "transformer.h.18.mlp.dense_h_to_4h.bias": "model-00020-of-00071.safetensors", + "transformer.h.18.mlp.dense_h_to_4h.weight": "model-00020-of-00071.safetensors", + "transformer.h.18.post_attention_layernorm.bias": "model-00020-of-00071.safetensors", + "transformer.h.18.post_attention_layernorm.weight": "model-00020-of-00071.safetensors", + "transformer.h.18.self_attention.dense.bias": "model-00020-of-00071.safetensors", + "transformer.h.18.self_attention.dense.weight": "model-00020-of-00071.safetensors", + "transformer.h.18.self_attention.query_key_value.bias": "model-00020-of-00071.safetensors", + "transformer.h.18.self_attention.query_key_value.weight": "model-00020-of-00071.safetensors", + "transformer.h.19.input_layernorm.bias": "model-00020-of-00071.safetensors", + "transformer.h.19.input_layernorm.weight": "model-00020-of-00071.safetensors", + "transformer.h.19.mlp.dense_4h_to_h.bias": "model-00021-of-00071.safetensors", + "transformer.h.19.mlp.dense_4h_to_h.weight": "model-00021-of-00071.safetensors", + "transformer.h.19.mlp.dense_h_to_4h.bias": "model-00021-of-00071.safetensors", + "transformer.h.19.mlp.dense_h_to_4h.weight": "model-00021-of-00071.safetensors", + "transformer.h.19.post_attention_layernorm.bias": "model-00021-of-00071.safetensors", + "transformer.h.19.post_attention_layernorm.weight": "model-00021-of-00071.safetensors", + "transformer.h.19.self_attention.dense.bias": "model-00021-of-00071.safetensors", + "transformer.h.19.self_attention.dense.weight": "model-00021-of-00071.safetensors", + "transformer.h.19.self_attention.query_key_value.bias": "model-00021-of-00071.safetensors", + "transformer.h.19.self_attention.query_key_value.weight": "model-00021-of-00071.safetensors", + "transformer.h.2.input_layernorm.bias": "model-00003-of-00071.safetensors", + "transformer.h.2.input_layernorm.weight": "model-00003-of-00071.safetensors", + "transformer.h.2.mlp.dense_4h_to_h.bias": "model-00004-of-00071.safetensors", + "transformer.h.2.mlp.dense_4h_to_h.weight": "model-00004-of-00071.safetensors", + "transformer.h.2.mlp.dense_h_to_4h.bias": "model-00004-of-00071.safetensors", + "transformer.h.2.mlp.dense_h_to_4h.weight": "model-00004-of-00071.safetensors", + "transformer.h.2.post_attention_layernorm.bias": "model-00004-of-00071.safetensors", + "transformer.h.2.post_attention_layernorm.weight": "model-00004-of-00071.safetensors", + "transformer.h.2.self_attention.dense.bias": "model-00004-of-00071.safetensors", + "transformer.h.2.self_attention.dense.weight": "model-00004-of-00071.safetensors", + "transformer.h.2.self_attention.query_key_value.bias": "model-00004-of-00071.safetensors", + "transformer.h.2.self_attention.query_key_value.weight": "model-00004-of-00071.safetensors", + "transformer.h.20.input_layernorm.bias": "model-00021-of-00071.safetensors", + "transformer.h.20.input_layernorm.weight": "model-00021-of-00071.safetensors", + "transformer.h.20.mlp.dense_4h_to_h.bias": "model-00022-of-00071.safetensors", + "transformer.h.20.mlp.dense_4h_to_h.weight": "model-00022-of-00071.safetensors", + "transformer.h.20.mlp.dense_h_to_4h.bias": "model-00022-of-00071.safetensors", + "transformer.h.20.mlp.dense_h_to_4h.weight": "model-00022-of-00071.safetensors", + "transformer.h.20.post_attention_layernorm.bias": "model-00022-of-00071.safetensors", + "transformer.h.20.post_attention_layernorm.weight": "model-00022-of-00071.safetensors", + "transformer.h.20.self_attention.dense.bias": "model-00022-of-00071.safetensors", + "transformer.h.20.self_attention.dense.weight": "model-00022-of-00071.safetensors", + "transformer.h.20.self_attention.query_key_value.bias": "model-00022-of-00071.safetensors", + "transformer.h.20.self_attention.query_key_value.weight": "model-00022-of-00071.safetensors", + "transformer.h.21.input_layernorm.bias": "model-00022-of-00071.safetensors", + "transformer.h.21.input_layernorm.weight": "model-00022-of-00071.safetensors", + "transformer.h.21.mlp.dense_4h_to_h.bias": "model-00023-of-00071.safetensors", + "transformer.h.21.mlp.dense_4h_to_h.weight": "model-00023-of-00071.safetensors", + "transformer.h.21.mlp.dense_h_to_4h.bias": "model-00023-of-00071.safetensors", + "transformer.h.21.mlp.dense_h_to_4h.weight": "model-00023-of-00071.safetensors", + "transformer.h.21.post_attention_layernorm.bias": "model-00023-of-00071.safetensors", + "transformer.h.21.post_attention_layernorm.weight": "model-00023-of-00071.safetensors", + "transformer.h.21.self_attention.dense.bias": "model-00023-of-00071.safetensors", + "transformer.h.21.self_attention.dense.weight": "model-00023-of-00071.safetensors", + "transformer.h.21.self_attention.query_key_value.bias": "model-00023-of-00071.safetensors", + "transformer.h.21.self_attention.query_key_value.weight": "model-00023-of-00071.safetensors", + "transformer.h.22.input_layernorm.bias": "model-00023-of-00071.safetensors", + "transformer.h.22.input_layernorm.weight": "model-00023-of-00071.safetensors", + "transformer.h.22.mlp.dense_4h_to_h.bias": "model-00024-of-00071.safetensors", + "transformer.h.22.mlp.dense_4h_to_h.weight": "model-00024-of-00071.safetensors", + "transformer.h.22.mlp.dense_h_to_4h.bias": "model-00024-of-00071.safetensors", + "transformer.h.22.mlp.dense_h_to_4h.weight": "model-00024-of-00071.safetensors", + "transformer.h.22.post_attention_layernorm.bias": "model-00024-of-00071.safetensors", + "transformer.h.22.post_attention_layernorm.weight": "model-00024-of-00071.safetensors", + "transformer.h.22.self_attention.dense.bias": "model-00024-of-00071.safetensors", + "transformer.h.22.self_attention.dense.weight": "model-00024-of-00071.safetensors", + "transformer.h.22.self_attention.query_key_value.bias": "model-00024-of-00071.safetensors", + "transformer.h.22.self_attention.query_key_value.weight": "model-00024-of-00071.safetensors", + "transformer.h.23.input_layernorm.bias": "model-00024-of-00071.safetensors", + "transformer.h.23.input_layernorm.weight": "model-00024-of-00071.safetensors", + "transformer.h.23.mlp.dense_4h_to_h.bias": "model-00025-of-00071.safetensors", + "transformer.h.23.mlp.dense_4h_to_h.weight": "model-00025-of-00071.safetensors", + "transformer.h.23.mlp.dense_h_to_4h.bias": "model-00025-of-00071.safetensors", + "transformer.h.23.mlp.dense_h_to_4h.weight": "model-00025-of-00071.safetensors", + "transformer.h.23.post_attention_layernorm.bias": "model-00025-of-00071.safetensors", + "transformer.h.23.post_attention_layernorm.weight": "model-00025-of-00071.safetensors", + "transformer.h.23.self_attention.dense.bias": "model-00025-of-00071.safetensors", + "transformer.h.23.self_attention.dense.weight": "model-00025-of-00071.safetensors", + "transformer.h.23.self_attention.query_key_value.bias": "model-00025-of-00071.safetensors", + "transformer.h.23.self_attention.query_key_value.weight": "model-00025-of-00071.safetensors", + "transformer.h.24.input_layernorm.bias": "model-00025-of-00071.safetensors", + "transformer.h.24.input_layernorm.weight": "model-00025-of-00071.safetensors", + "transformer.h.24.mlp.dense_4h_to_h.bias": "model-00026-of-00071.safetensors", + "transformer.h.24.mlp.dense_4h_to_h.weight": "model-00026-of-00071.safetensors", + "transformer.h.24.mlp.dense_h_to_4h.bias": "model-00026-of-00071.safetensors", + "transformer.h.24.mlp.dense_h_to_4h.weight": "model-00026-of-00071.safetensors", + "transformer.h.24.post_attention_layernorm.bias": "model-00026-of-00071.safetensors", + "transformer.h.24.post_attention_layernorm.weight": "model-00026-of-00071.safetensors", + "transformer.h.24.self_attention.dense.bias": "model-00026-of-00071.safetensors", + "transformer.h.24.self_attention.dense.weight": "model-00026-of-00071.safetensors", + "transformer.h.24.self_attention.query_key_value.bias": "model-00026-of-00071.safetensors", + "transformer.h.24.self_attention.query_key_value.weight": "model-00026-of-00071.safetensors", + "transformer.h.25.input_layernorm.bias": "model-00026-of-00071.safetensors", + "transformer.h.25.input_layernorm.weight": "model-00026-of-00071.safetensors", + "transformer.h.25.mlp.dense_4h_to_h.bias": "model-00027-of-00071.safetensors", + "transformer.h.25.mlp.dense_4h_to_h.weight": "model-00027-of-00071.safetensors", + "transformer.h.25.mlp.dense_h_to_4h.bias": "model-00027-of-00071.safetensors", + "transformer.h.25.mlp.dense_h_to_4h.weight": "model-00027-of-00071.safetensors", + "transformer.h.25.post_attention_layernorm.bias": "model-00027-of-00071.safetensors", + "transformer.h.25.post_attention_layernorm.weight": "model-00027-of-00071.safetensors", + "transformer.h.25.self_attention.dense.bias": "model-00027-of-00071.safetensors", + "transformer.h.25.self_attention.dense.weight": "model-00027-of-00071.safetensors", + "transformer.h.25.self_attention.query_key_value.bias": "model-00027-of-00071.safetensors", + "transformer.h.25.self_attention.query_key_value.weight": "model-00027-of-00071.safetensors", + "transformer.h.26.input_layernorm.bias": "model-00027-of-00071.safetensors", + "transformer.h.26.input_layernorm.weight": "model-00027-of-00071.safetensors", + "transformer.h.26.mlp.dense_4h_to_h.bias": "model-00028-of-00071.safetensors", + "transformer.h.26.mlp.dense_4h_to_h.weight": "model-00028-of-00071.safetensors", + "transformer.h.26.mlp.dense_h_to_4h.bias": "model-00028-of-00071.safetensors", + "transformer.h.26.mlp.dense_h_to_4h.weight": "model-00028-of-00071.safetensors", + "transformer.h.26.post_attention_layernorm.bias": "model-00028-of-00071.safetensors", + "transformer.h.26.post_attention_layernorm.weight": "model-00028-of-00071.safetensors", + "transformer.h.26.self_attention.dense.bias": "model-00028-of-00071.safetensors", + "transformer.h.26.self_attention.dense.weight": "model-00028-of-00071.safetensors", + "transformer.h.26.self_attention.query_key_value.bias": "model-00028-of-00071.safetensors", + "transformer.h.26.self_attention.query_key_value.weight": "model-00028-of-00071.safetensors", + "transformer.h.27.input_layernorm.bias": "model-00028-of-00071.safetensors", + "transformer.h.27.input_layernorm.weight": "model-00028-of-00071.safetensors", + "transformer.h.27.mlp.dense_4h_to_h.bias": "model-00029-of-00071.safetensors", + "transformer.h.27.mlp.dense_4h_to_h.weight": "model-00029-of-00071.safetensors", + "transformer.h.27.mlp.dense_h_to_4h.bias": "model-00029-of-00071.safetensors", + "transformer.h.27.mlp.dense_h_to_4h.weight": "model-00029-of-00071.safetensors", + "transformer.h.27.post_attention_layernorm.bias": "model-00029-of-00071.safetensors", + "transformer.h.27.post_attention_layernorm.weight": "model-00029-of-00071.safetensors", + "transformer.h.27.self_attention.dense.bias": "model-00029-of-00071.safetensors", + "transformer.h.27.self_attention.dense.weight": "model-00029-of-00071.safetensors", + "transformer.h.27.self_attention.query_key_value.bias": "model-00029-of-00071.safetensors", + "transformer.h.27.self_attention.query_key_value.weight": "model-00029-of-00071.safetensors", + "transformer.h.28.input_layernorm.bias": "model-00029-of-00071.safetensors", + "transformer.h.28.input_layernorm.weight": "model-00029-of-00071.safetensors", + "transformer.h.28.mlp.dense_4h_to_h.bias": "model-00030-of-00071.safetensors", + "transformer.h.28.mlp.dense_4h_to_h.weight": "model-00030-of-00071.safetensors", + "transformer.h.28.mlp.dense_h_to_4h.bias": "model-00030-of-00071.safetensors", + "transformer.h.28.mlp.dense_h_to_4h.weight": "model-00030-of-00071.safetensors", + "transformer.h.28.post_attention_layernorm.bias": "model-00030-of-00071.safetensors", + "transformer.h.28.post_attention_layernorm.weight": "model-00030-of-00071.safetensors", + "transformer.h.28.self_attention.dense.bias": "model-00030-of-00071.safetensors", + "transformer.h.28.self_attention.dense.weight": "model-00030-of-00071.safetensors", + "transformer.h.28.self_attention.query_key_value.bias": "model-00030-of-00071.safetensors", + "transformer.h.28.self_attention.query_key_value.weight": "model-00030-of-00071.safetensors", + "transformer.h.29.input_layernorm.bias": "model-00030-of-00071.safetensors", + "transformer.h.29.input_layernorm.weight": "model-00030-of-00071.safetensors", + "transformer.h.29.mlp.dense_4h_to_h.bias": "model-00031-of-00071.safetensors", + "transformer.h.29.mlp.dense_4h_to_h.weight": "model-00031-of-00071.safetensors", + "transformer.h.29.mlp.dense_h_to_4h.bias": "model-00031-of-00071.safetensors", + "transformer.h.29.mlp.dense_h_to_4h.weight": "model-00031-of-00071.safetensors", + "transformer.h.29.post_attention_layernorm.bias": "model-00031-of-00071.safetensors", + "transformer.h.29.post_attention_layernorm.weight": "model-00031-of-00071.safetensors", + "transformer.h.29.self_attention.dense.bias": "model-00031-of-00071.safetensors", + "transformer.h.29.self_attention.dense.weight": "model-00031-of-00071.safetensors", + "transformer.h.29.self_attention.query_key_value.bias": "model-00031-of-00071.safetensors", + "transformer.h.29.self_attention.query_key_value.weight": "model-00031-of-00071.safetensors", + "transformer.h.3.input_layernorm.bias": "model-00004-of-00071.safetensors", + "transformer.h.3.input_layernorm.weight": "model-00004-of-00071.safetensors", + "transformer.h.3.mlp.dense_4h_to_h.bias": "model-00005-of-00071.safetensors", + "transformer.h.3.mlp.dense_4h_to_h.weight": "model-00005-of-00071.safetensors", + "transformer.h.3.mlp.dense_h_to_4h.bias": "model-00005-of-00071.safetensors", + "transformer.h.3.mlp.dense_h_to_4h.weight": "model-00005-of-00071.safetensors", + "transformer.h.3.post_attention_layernorm.bias": "model-00005-of-00071.safetensors", + "transformer.h.3.post_attention_layernorm.weight": "model-00005-of-00071.safetensors", + "transformer.h.3.self_attention.dense.bias": "model-00005-of-00071.safetensors", + "transformer.h.3.self_attention.dense.weight": "model-00005-of-00071.safetensors", + "transformer.h.3.self_attention.query_key_value.bias": "model-00005-of-00071.safetensors", + "transformer.h.3.self_attention.query_key_value.weight": "model-00005-of-00071.safetensors", + "transformer.h.30.input_layernorm.bias": "model-00031-of-00071.safetensors", + "transformer.h.30.input_layernorm.weight": "model-00031-of-00071.safetensors", + "transformer.h.30.mlp.dense_4h_to_h.bias": "model-00032-of-00071.safetensors", + "transformer.h.30.mlp.dense_4h_to_h.weight": "model-00032-of-00071.safetensors", + "transformer.h.30.mlp.dense_h_to_4h.bias": "model-00032-of-00071.safetensors", + "transformer.h.30.mlp.dense_h_to_4h.weight": "model-00032-of-00071.safetensors", + "transformer.h.30.post_attention_layernorm.bias": "model-00032-of-00071.safetensors", + "transformer.h.30.post_attention_layernorm.weight": "model-00032-of-00071.safetensors", + "transformer.h.30.self_attention.dense.bias": "model-00032-of-00071.safetensors", + "transformer.h.30.self_attention.dense.weight": "model-00032-of-00071.safetensors", + "transformer.h.30.self_attention.query_key_value.bias": "model-00032-of-00071.safetensors", + "transformer.h.30.self_attention.query_key_value.weight": "model-00032-of-00071.safetensors", + "transformer.h.31.input_layernorm.bias": "model-00032-of-00071.safetensors", + "transformer.h.31.input_layernorm.weight": "model-00032-of-00071.safetensors", + "transformer.h.31.mlp.dense_4h_to_h.bias": "model-00033-of-00071.safetensors", + "transformer.h.31.mlp.dense_4h_to_h.weight": "model-00033-of-00071.safetensors", + "transformer.h.31.mlp.dense_h_to_4h.bias": "model-00033-of-00071.safetensors", + "transformer.h.31.mlp.dense_h_to_4h.weight": "model-00033-of-00071.safetensors", + "transformer.h.31.post_attention_layernorm.bias": "model-00033-of-00071.safetensors", + "transformer.h.31.post_attention_layernorm.weight": "model-00033-of-00071.safetensors", + "transformer.h.31.self_attention.dense.bias": "model-00033-of-00071.safetensors", + "transformer.h.31.self_attention.dense.weight": "model-00033-of-00071.safetensors", + "transformer.h.31.self_attention.query_key_value.bias": "model-00033-of-00071.safetensors", + "transformer.h.31.self_attention.query_key_value.weight": "model-00033-of-00071.safetensors", + "transformer.h.32.input_layernorm.bias": "model-00033-of-00071.safetensors", + "transformer.h.32.input_layernorm.weight": "model-00033-of-00071.safetensors", + "transformer.h.32.mlp.dense_4h_to_h.bias": "model-00034-of-00071.safetensors", + "transformer.h.32.mlp.dense_4h_to_h.weight": "model-00034-of-00071.safetensors", + "transformer.h.32.mlp.dense_h_to_4h.bias": "model-00034-of-00071.safetensors", + "transformer.h.32.mlp.dense_h_to_4h.weight": "model-00034-of-00071.safetensors", + "transformer.h.32.post_attention_layernorm.bias": "model-00034-of-00071.safetensors", + "transformer.h.32.post_attention_layernorm.weight": "model-00034-of-00071.safetensors", + "transformer.h.32.self_attention.dense.bias": "model-00034-of-00071.safetensors", + "transformer.h.32.self_attention.dense.weight": "model-00034-of-00071.safetensors", + "transformer.h.32.self_attention.query_key_value.bias": "model-00034-of-00071.safetensors", + "transformer.h.32.self_attention.query_key_value.weight": "model-00034-of-00071.safetensors", + "transformer.h.33.input_layernorm.bias": "model-00034-of-00071.safetensors", + "transformer.h.33.input_layernorm.weight": "model-00034-of-00071.safetensors", + "transformer.h.33.mlp.dense_4h_to_h.bias": "model-00035-of-00071.safetensors", + "transformer.h.33.mlp.dense_4h_to_h.weight": "model-00035-of-00071.safetensors", + "transformer.h.33.mlp.dense_h_to_4h.bias": "model-00035-of-00071.safetensors", + "transformer.h.33.mlp.dense_h_to_4h.weight": "model-00035-of-00071.safetensors", + "transformer.h.33.post_attention_layernorm.bias": "model-00035-of-00071.safetensors", + "transformer.h.33.post_attention_layernorm.weight": "model-00035-of-00071.safetensors", + "transformer.h.33.self_attention.dense.bias": "model-00035-of-00071.safetensors", + "transformer.h.33.self_attention.dense.weight": "model-00035-of-00071.safetensors", + "transformer.h.33.self_attention.query_key_value.bias": "model-00035-of-00071.safetensors", + "transformer.h.33.self_attention.query_key_value.weight": "model-00035-of-00071.safetensors", + "transformer.h.34.input_layernorm.bias": "model-00035-of-00071.safetensors", + "transformer.h.34.input_layernorm.weight": "model-00035-of-00071.safetensors", + "transformer.h.34.mlp.dense_4h_to_h.bias": "model-00036-of-00071.safetensors", + "transformer.h.34.mlp.dense_4h_to_h.weight": "model-00036-of-00071.safetensors", + "transformer.h.34.mlp.dense_h_to_4h.bias": "model-00036-of-00071.safetensors", + "transformer.h.34.mlp.dense_h_to_4h.weight": "model-00036-of-00071.safetensors", + "transformer.h.34.post_attention_layernorm.bias": "model-00036-of-00071.safetensors", + "transformer.h.34.post_attention_layernorm.weight": "model-00036-of-00071.safetensors", + "transformer.h.34.self_attention.dense.bias": "model-00036-of-00071.safetensors", + "transformer.h.34.self_attention.dense.weight": "model-00036-of-00071.safetensors", + "transformer.h.34.self_attention.query_key_value.bias": "model-00036-of-00071.safetensors", + "transformer.h.34.self_attention.query_key_value.weight": "model-00036-of-00071.safetensors", + "transformer.h.35.input_layernorm.bias": "model-00036-of-00071.safetensors", + "transformer.h.35.input_layernorm.weight": "model-00036-of-00071.safetensors", + "transformer.h.35.mlp.dense_4h_to_h.bias": "model-00037-of-00071.safetensors", + "transformer.h.35.mlp.dense_4h_to_h.weight": "model-00037-of-00071.safetensors", + "transformer.h.35.mlp.dense_h_to_4h.bias": "model-00037-of-00071.safetensors", + "transformer.h.35.mlp.dense_h_to_4h.weight": "model-00037-of-00071.safetensors", + "transformer.h.35.post_attention_layernorm.bias": "model-00037-of-00071.safetensors", + "transformer.h.35.post_attention_layernorm.weight": "model-00037-of-00071.safetensors", + "transformer.h.35.self_attention.dense.bias": "model-00037-of-00071.safetensors", + "transformer.h.35.self_attention.dense.weight": "model-00037-of-00071.safetensors", + "transformer.h.35.self_attention.query_key_value.bias": "model-00037-of-00071.safetensors", + "transformer.h.35.self_attention.query_key_value.weight": "model-00037-of-00071.safetensors", + "transformer.h.36.input_layernorm.bias": "model-00037-of-00071.safetensors", + "transformer.h.36.input_layernorm.weight": "model-00037-of-00071.safetensors", + "transformer.h.36.mlp.dense_4h_to_h.bias": "model-00038-of-00071.safetensors", + "transformer.h.36.mlp.dense_4h_to_h.weight": "model-00038-of-00071.safetensors", + "transformer.h.36.mlp.dense_h_to_4h.bias": "model-00038-of-00071.safetensors", + "transformer.h.36.mlp.dense_h_to_4h.weight": "model-00038-of-00071.safetensors", + "transformer.h.36.post_attention_layernorm.bias": "model-00038-of-00071.safetensors", + "transformer.h.36.post_attention_layernorm.weight": "model-00038-of-00071.safetensors", + "transformer.h.36.self_attention.dense.bias": "model-00038-of-00071.safetensors", + "transformer.h.36.self_attention.dense.weight": "model-00038-of-00071.safetensors", + "transformer.h.36.self_attention.query_key_value.bias": "model-00038-of-00071.safetensors", + "transformer.h.36.self_attention.query_key_value.weight": "model-00038-of-00071.safetensors", + "transformer.h.37.input_layernorm.bias": "model-00038-of-00071.safetensors", + "transformer.h.37.input_layernorm.weight": "model-00038-of-00071.safetensors", + "transformer.h.37.mlp.dense_4h_to_h.bias": "model-00039-of-00071.safetensors", + "transformer.h.37.mlp.dense_4h_to_h.weight": "model-00039-of-00071.safetensors", + "transformer.h.37.mlp.dense_h_to_4h.bias": "model-00039-of-00071.safetensors", + "transformer.h.37.mlp.dense_h_to_4h.weight": "model-00039-of-00071.safetensors", + "transformer.h.37.post_attention_layernorm.bias": "model-00039-of-00071.safetensors", + "transformer.h.37.post_attention_layernorm.weight": "model-00039-of-00071.safetensors", + "transformer.h.37.self_attention.dense.bias": "model-00039-of-00071.safetensors", + "transformer.h.37.self_attention.dense.weight": "model-00039-of-00071.safetensors", + "transformer.h.37.self_attention.query_key_value.bias": "model-00039-of-00071.safetensors", + "transformer.h.37.self_attention.query_key_value.weight": "model-00039-of-00071.safetensors", + "transformer.h.38.input_layernorm.bias": "model-00039-of-00071.safetensors", + "transformer.h.38.input_layernorm.weight": "model-00039-of-00071.safetensors", + "transformer.h.38.mlp.dense_4h_to_h.bias": "model-00040-of-00071.safetensors", + "transformer.h.38.mlp.dense_4h_to_h.weight": "model-00040-of-00071.safetensors", + "transformer.h.38.mlp.dense_h_to_4h.bias": "model-00040-of-00071.safetensors", + "transformer.h.38.mlp.dense_h_to_4h.weight": "model-00040-of-00071.safetensors", + "transformer.h.38.post_attention_layernorm.bias": "model-00040-of-00071.safetensors", + "transformer.h.38.post_attention_layernorm.weight": "model-00040-of-00071.safetensors", + "transformer.h.38.self_attention.dense.bias": "model-00040-of-00071.safetensors", + "transformer.h.38.self_attention.dense.weight": "model-00040-of-00071.safetensors", + "transformer.h.38.self_attention.query_key_value.bias": "model-00040-of-00071.safetensors", + "transformer.h.38.self_attention.query_key_value.weight": "model-00040-of-00071.safetensors", + "transformer.h.39.input_layernorm.bias": "model-00040-of-00071.safetensors", + "transformer.h.39.input_layernorm.weight": "model-00040-of-00071.safetensors", + "transformer.h.39.mlp.dense_4h_to_h.bias": "model-00041-of-00071.safetensors", + "transformer.h.39.mlp.dense_4h_to_h.weight": "model-00041-of-00071.safetensors", + "transformer.h.39.mlp.dense_h_to_4h.bias": "model-00041-of-00071.safetensors", + "transformer.h.39.mlp.dense_h_to_4h.weight": "model-00041-of-00071.safetensors", + "transformer.h.39.post_attention_layernorm.bias": "model-00041-of-00071.safetensors", + "transformer.h.39.post_attention_layernorm.weight": "model-00041-of-00071.safetensors", + "transformer.h.39.self_attention.dense.bias": "model-00041-of-00071.safetensors", + "transformer.h.39.self_attention.dense.weight": "model-00041-of-00071.safetensors", + "transformer.h.39.self_attention.query_key_value.bias": "model-00041-of-00071.safetensors", + "transformer.h.39.self_attention.query_key_value.weight": "model-00041-of-00071.safetensors", + "transformer.h.4.input_layernorm.bias": "model-00005-of-00071.safetensors", + "transformer.h.4.input_layernorm.weight": "model-00005-of-00071.safetensors", + "transformer.h.4.mlp.dense_4h_to_h.bias": "model-00006-of-00071.safetensors", + "transformer.h.4.mlp.dense_4h_to_h.weight": "model-00006-of-00071.safetensors", + "transformer.h.4.mlp.dense_h_to_4h.bias": "model-00006-of-00071.safetensors", + "transformer.h.4.mlp.dense_h_to_4h.weight": "model-00006-of-00071.safetensors", + "transformer.h.4.post_attention_layernorm.bias": "model-00006-of-00071.safetensors", + "transformer.h.4.post_attention_layernorm.weight": "model-00006-of-00071.safetensors", + "transformer.h.4.self_attention.dense.bias": "model-00006-of-00071.safetensors", + "transformer.h.4.self_attention.dense.weight": "model-00006-of-00071.safetensors", + "transformer.h.4.self_attention.query_key_value.bias": "model-00006-of-00071.safetensors", + "transformer.h.4.self_attention.query_key_value.weight": "model-00006-of-00071.safetensors", + "transformer.h.40.input_layernorm.bias": "model-00041-of-00071.safetensors", + "transformer.h.40.input_layernorm.weight": "model-00041-of-00071.safetensors", + "transformer.h.40.mlp.dense_4h_to_h.bias": "model-00042-of-00071.safetensors", + "transformer.h.40.mlp.dense_4h_to_h.weight": "model-00042-of-00071.safetensors", + "transformer.h.40.mlp.dense_h_to_4h.bias": "model-00042-of-00071.safetensors", + "transformer.h.40.mlp.dense_h_to_4h.weight": "model-00042-of-00071.safetensors", + "transformer.h.40.post_attention_layernorm.bias": "model-00042-of-00071.safetensors", + "transformer.h.40.post_attention_layernorm.weight": "model-00042-of-00071.safetensors", + "transformer.h.40.self_attention.dense.bias": "model-00042-of-00071.safetensors", + "transformer.h.40.self_attention.dense.weight": "model-00042-of-00071.safetensors", + "transformer.h.40.self_attention.query_key_value.bias": "model-00042-of-00071.safetensors", + "transformer.h.40.self_attention.query_key_value.weight": "model-00042-of-00071.safetensors", + "transformer.h.41.input_layernorm.bias": "model-00042-of-00071.safetensors", + "transformer.h.41.input_layernorm.weight": "model-00042-of-00071.safetensors", + "transformer.h.41.mlp.dense_4h_to_h.bias": "model-00043-of-00071.safetensors", + "transformer.h.41.mlp.dense_4h_to_h.weight": "model-00043-of-00071.safetensors", + "transformer.h.41.mlp.dense_h_to_4h.bias": "model-00043-of-00071.safetensors", + "transformer.h.41.mlp.dense_h_to_4h.weight": "model-00043-of-00071.safetensors", + "transformer.h.41.post_attention_layernorm.bias": "model-00043-of-00071.safetensors", + "transformer.h.41.post_attention_layernorm.weight": "model-00043-of-00071.safetensors", + "transformer.h.41.self_attention.dense.bias": "model-00043-of-00071.safetensors", + "transformer.h.41.self_attention.dense.weight": "model-00043-of-00071.safetensors", + "transformer.h.41.self_attention.query_key_value.bias": "model-00043-of-00071.safetensors", + "transformer.h.41.self_attention.query_key_value.weight": "model-00043-of-00071.safetensors", + "transformer.h.42.input_layernorm.bias": "model-00043-of-00071.safetensors", + "transformer.h.42.input_layernorm.weight": "model-00043-of-00071.safetensors", + "transformer.h.42.mlp.dense_4h_to_h.bias": "model-00044-of-00071.safetensors", + "transformer.h.42.mlp.dense_4h_to_h.weight": "model-00044-of-00071.safetensors", + "transformer.h.42.mlp.dense_h_to_4h.bias": "model-00044-of-00071.safetensors", + "transformer.h.42.mlp.dense_h_to_4h.weight": "model-00044-of-00071.safetensors", + "transformer.h.42.post_attention_layernorm.bias": "model-00044-of-00071.safetensors", + "transformer.h.42.post_attention_layernorm.weight": "model-00044-of-00071.safetensors", + "transformer.h.42.self_attention.dense.bias": "model-00044-of-00071.safetensors", + "transformer.h.42.self_attention.dense.weight": "model-00044-of-00071.safetensors", + "transformer.h.42.self_attention.query_key_value.bias": "model-00044-of-00071.safetensors", + "transformer.h.42.self_attention.query_key_value.weight": "model-00044-of-00071.safetensors", + "transformer.h.43.input_layernorm.bias": "model-00044-of-00071.safetensors", + "transformer.h.43.input_layernorm.weight": "model-00044-of-00071.safetensors", + "transformer.h.43.mlp.dense_4h_to_h.bias": "model-00045-of-00071.safetensors", + "transformer.h.43.mlp.dense_4h_to_h.weight": "model-00045-of-00071.safetensors", + "transformer.h.43.mlp.dense_h_to_4h.bias": "model-00045-of-00071.safetensors", + "transformer.h.43.mlp.dense_h_to_4h.weight": "model-00045-of-00071.safetensors", + "transformer.h.43.post_attention_layernorm.bias": "model-00045-of-00071.safetensors", + "transformer.h.43.post_attention_layernorm.weight": "model-00045-of-00071.safetensors", + "transformer.h.43.self_attention.dense.bias": "model-00045-of-00071.safetensors", + "transformer.h.43.self_attention.dense.weight": "model-00045-of-00071.safetensors", + "transformer.h.43.self_attention.query_key_value.bias": "model-00045-of-00071.safetensors", + "transformer.h.43.self_attention.query_key_value.weight": "model-00045-of-00071.safetensors", + "transformer.h.44.input_layernorm.bias": "model-00045-of-00071.safetensors", + "transformer.h.44.input_layernorm.weight": "model-00045-of-00071.safetensors", + "transformer.h.44.mlp.dense_4h_to_h.bias": "model-00046-of-00071.safetensors", + "transformer.h.44.mlp.dense_4h_to_h.weight": "model-00046-of-00071.safetensors", + "transformer.h.44.mlp.dense_h_to_4h.bias": "model-00046-of-00071.safetensors", + "transformer.h.44.mlp.dense_h_to_4h.weight": "model-00046-of-00071.safetensors", + "transformer.h.44.post_attention_layernorm.bias": "model-00046-of-00071.safetensors", + "transformer.h.44.post_attention_layernorm.weight": "model-00046-of-00071.safetensors", + "transformer.h.44.self_attention.dense.bias": "model-00046-of-00071.safetensors", + "transformer.h.44.self_attention.dense.weight": "model-00046-of-00071.safetensors", + "transformer.h.44.self_attention.query_key_value.bias": "model-00046-of-00071.safetensors", + "transformer.h.44.self_attention.query_key_value.weight": "model-00046-of-00071.safetensors", + "transformer.h.45.input_layernorm.bias": "model-00046-of-00071.safetensors", + "transformer.h.45.input_layernorm.weight": "model-00046-of-00071.safetensors", + "transformer.h.45.mlp.dense_4h_to_h.bias": "model-00047-of-00071.safetensors", + "transformer.h.45.mlp.dense_4h_to_h.weight": "model-00047-of-00071.safetensors", + "transformer.h.45.mlp.dense_h_to_4h.bias": "model-00047-of-00071.safetensors", + "transformer.h.45.mlp.dense_h_to_4h.weight": "model-00047-of-00071.safetensors", + "transformer.h.45.post_attention_layernorm.bias": "model-00047-of-00071.safetensors", + "transformer.h.45.post_attention_layernorm.weight": "model-00047-of-00071.safetensors", + "transformer.h.45.self_attention.dense.bias": "model-00047-of-00071.safetensors", + "transformer.h.45.self_attention.dense.weight": "model-00047-of-00071.safetensors", + "transformer.h.45.self_attention.query_key_value.bias": "model-00047-of-00071.safetensors", + "transformer.h.45.self_attention.query_key_value.weight": "model-00047-of-00071.safetensors", + "transformer.h.46.input_layernorm.bias": "model-00047-of-00071.safetensors", + "transformer.h.46.input_layernorm.weight": "model-00047-of-00071.safetensors", + "transformer.h.46.mlp.dense_4h_to_h.bias": "model-00048-of-00071.safetensors", + "transformer.h.46.mlp.dense_4h_to_h.weight": "model-00048-of-00071.safetensors", + "transformer.h.46.mlp.dense_h_to_4h.bias": "model-00048-of-00071.safetensors", + "transformer.h.46.mlp.dense_h_to_4h.weight": "model-00048-of-00071.safetensors", + "transformer.h.46.post_attention_layernorm.bias": "model-00048-of-00071.safetensors", + "transformer.h.46.post_attention_layernorm.weight": "model-00048-of-00071.safetensors", + "transformer.h.46.self_attention.dense.bias": "model-00048-of-00071.safetensors", + "transformer.h.46.self_attention.dense.weight": "model-00048-of-00071.safetensors", + "transformer.h.46.self_attention.query_key_value.bias": "model-00048-of-00071.safetensors", + "transformer.h.46.self_attention.query_key_value.weight": "model-00048-of-00071.safetensors", + "transformer.h.47.input_layernorm.bias": "model-00048-of-00071.safetensors", + "transformer.h.47.input_layernorm.weight": "model-00048-of-00071.safetensors", + "transformer.h.47.mlp.dense_4h_to_h.bias": "model-00049-of-00071.safetensors", + "transformer.h.47.mlp.dense_4h_to_h.weight": "model-00049-of-00071.safetensors", + "transformer.h.47.mlp.dense_h_to_4h.bias": "model-00049-of-00071.safetensors", + "transformer.h.47.mlp.dense_h_to_4h.weight": "model-00049-of-00071.safetensors", + "transformer.h.47.post_attention_layernorm.bias": "model-00049-of-00071.safetensors", + "transformer.h.47.post_attention_layernorm.weight": "model-00049-of-00071.safetensors", + "transformer.h.47.self_attention.dense.bias": "model-00049-of-00071.safetensors", + "transformer.h.47.self_attention.dense.weight": "model-00049-of-00071.safetensors", + "transformer.h.47.self_attention.query_key_value.bias": "model-00049-of-00071.safetensors", + "transformer.h.47.self_attention.query_key_value.weight": "model-00049-of-00071.safetensors", + "transformer.h.48.input_layernorm.bias": "model-00049-of-00071.safetensors", + "transformer.h.48.input_layernorm.weight": "model-00049-of-00071.safetensors", + "transformer.h.48.mlp.dense_4h_to_h.bias": "model-00050-of-00071.safetensors", + "transformer.h.48.mlp.dense_4h_to_h.weight": "model-00050-of-00071.safetensors", + "transformer.h.48.mlp.dense_h_to_4h.bias": "model-00050-of-00071.safetensors", + "transformer.h.48.mlp.dense_h_to_4h.weight": "model-00050-of-00071.safetensors", + "transformer.h.48.post_attention_layernorm.bias": "model-00050-of-00071.safetensors", + "transformer.h.48.post_attention_layernorm.weight": "model-00050-of-00071.safetensors", + "transformer.h.48.self_attention.dense.bias": "model-00050-of-00071.safetensors", + "transformer.h.48.self_attention.dense.weight": "model-00050-of-00071.safetensors", + "transformer.h.48.self_attention.query_key_value.bias": "model-00050-of-00071.safetensors", + "transformer.h.48.self_attention.query_key_value.weight": "model-00050-of-00071.safetensors", + "transformer.h.49.input_layernorm.bias": "model-00050-of-00071.safetensors", + "transformer.h.49.input_layernorm.weight": "model-00050-of-00071.safetensors", + "transformer.h.49.mlp.dense_4h_to_h.bias": "model-00051-of-00071.safetensors", + "transformer.h.49.mlp.dense_4h_to_h.weight": "model-00051-of-00071.safetensors", + "transformer.h.49.mlp.dense_h_to_4h.bias": "model-00051-of-00071.safetensors", + "transformer.h.49.mlp.dense_h_to_4h.weight": "model-00051-of-00071.safetensors", + "transformer.h.49.post_attention_layernorm.bias": "model-00051-of-00071.safetensors", + "transformer.h.49.post_attention_layernorm.weight": "model-00051-of-00071.safetensors", + "transformer.h.49.self_attention.dense.bias": "model-00051-of-00071.safetensors", + "transformer.h.49.self_attention.dense.weight": "model-00051-of-00071.safetensors", + "transformer.h.49.self_attention.query_key_value.bias": "model-00051-of-00071.safetensors", + "transformer.h.49.self_attention.query_key_value.weight": "model-00051-of-00071.safetensors", + "transformer.h.5.input_layernorm.bias": "model-00006-of-00071.safetensors", + "transformer.h.5.input_layernorm.weight": "model-00006-of-00071.safetensors", + "transformer.h.5.mlp.dense_4h_to_h.bias": "model-00007-of-00071.safetensors", + "transformer.h.5.mlp.dense_4h_to_h.weight": "model-00007-of-00071.safetensors", + "transformer.h.5.mlp.dense_h_to_4h.bias": "model-00007-of-00071.safetensors", + "transformer.h.5.mlp.dense_h_to_4h.weight": "model-00007-of-00071.safetensors", + "transformer.h.5.post_attention_layernorm.bias": "model-00007-of-00071.safetensors", + "transformer.h.5.post_attention_layernorm.weight": "model-00007-of-00071.safetensors", + "transformer.h.5.self_attention.dense.bias": "model-00007-of-00071.safetensors", + "transformer.h.5.self_attention.dense.weight": "model-00007-of-00071.safetensors", + "transformer.h.5.self_attention.query_key_value.bias": "model-00007-of-00071.safetensors", + "transformer.h.5.self_attention.query_key_value.weight": "model-00007-of-00071.safetensors", + "transformer.h.50.input_layernorm.bias": "model-00051-of-00071.safetensors", + "transformer.h.50.input_layernorm.weight": "model-00051-of-00071.safetensors", + "transformer.h.50.mlp.dense_4h_to_h.bias": "model-00052-of-00071.safetensors", + "transformer.h.50.mlp.dense_4h_to_h.weight": "model-00052-of-00071.safetensors", + "transformer.h.50.mlp.dense_h_to_4h.bias": "model-00052-of-00071.safetensors", + "transformer.h.50.mlp.dense_h_to_4h.weight": "model-00052-of-00071.safetensors", + "transformer.h.50.post_attention_layernorm.bias": "model-00052-of-00071.safetensors", + "transformer.h.50.post_attention_layernorm.weight": "model-00052-of-00071.safetensors", + "transformer.h.50.self_attention.dense.bias": "model-00052-of-00071.safetensors", + "transformer.h.50.self_attention.dense.weight": "model-00052-of-00071.safetensors", + "transformer.h.50.self_attention.query_key_value.bias": "model-00052-of-00071.safetensors", + "transformer.h.50.self_attention.query_key_value.weight": "model-00052-of-00071.safetensors", + "transformer.h.51.input_layernorm.bias": "model-00052-of-00071.safetensors", + "transformer.h.51.input_layernorm.weight": "model-00052-of-00071.safetensors", + "transformer.h.51.mlp.dense_4h_to_h.bias": "model-00053-of-00071.safetensors", + "transformer.h.51.mlp.dense_4h_to_h.weight": "model-00053-of-00071.safetensors", + "transformer.h.51.mlp.dense_h_to_4h.bias": "model-00053-of-00071.safetensors", + "transformer.h.51.mlp.dense_h_to_4h.weight": "model-00053-of-00071.safetensors", + "transformer.h.51.post_attention_layernorm.bias": "model-00053-of-00071.safetensors", + "transformer.h.51.post_attention_layernorm.weight": "model-00053-of-00071.safetensors", + "transformer.h.51.self_attention.dense.bias": "model-00053-of-00071.safetensors", + "transformer.h.51.self_attention.dense.weight": "model-00053-of-00071.safetensors", + "transformer.h.51.self_attention.query_key_value.bias": "model-00053-of-00071.safetensors", + "transformer.h.51.self_attention.query_key_value.weight": "model-00053-of-00071.safetensors", + "transformer.h.52.input_layernorm.bias": "model-00053-of-00071.safetensors", + "transformer.h.52.input_layernorm.weight": "model-00053-of-00071.safetensors", + "transformer.h.52.mlp.dense_4h_to_h.bias": "model-00054-of-00071.safetensors", + "transformer.h.52.mlp.dense_4h_to_h.weight": "model-00054-of-00071.safetensors", + "transformer.h.52.mlp.dense_h_to_4h.bias": "model-00054-of-00071.safetensors", + "transformer.h.52.mlp.dense_h_to_4h.weight": "model-00054-of-00071.safetensors", + "transformer.h.52.post_attention_layernorm.bias": "model-00054-of-00071.safetensors", + "transformer.h.52.post_attention_layernorm.weight": "model-00054-of-00071.safetensors", + "transformer.h.52.self_attention.dense.bias": "model-00054-of-00071.safetensors", + "transformer.h.52.self_attention.dense.weight": "model-00054-of-00071.safetensors", + "transformer.h.52.self_attention.query_key_value.bias": "model-00054-of-00071.safetensors", + "transformer.h.52.self_attention.query_key_value.weight": "model-00054-of-00071.safetensors", + "transformer.h.53.input_layernorm.bias": "model-00054-of-00071.safetensors", + "transformer.h.53.input_layernorm.weight": "model-00054-of-00071.safetensors", + "transformer.h.53.mlp.dense_4h_to_h.bias": "model-00055-of-00071.safetensors", + "transformer.h.53.mlp.dense_4h_to_h.weight": "model-00055-of-00071.safetensors", + "transformer.h.53.mlp.dense_h_to_4h.bias": "model-00055-of-00071.safetensors", + "transformer.h.53.mlp.dense_h_to_4h.weight": "model-00055-of-00071.safetensors", + "transformer.h.53.post_attention_layernorm.bias": "model-00055-of-00071.safetensors", + "transformer.h.53.post_attention_layernorm.weight": "model-00055-of-00071.safetensors", + "transformer.h.53.self_attention.dense.bias": "model-00055-of-00071.safetensors", + "transformer.h.53.self_attention.dense.weight": "model-00055-of-00071.safetensors", + "transformer.h.53.self_attention.query_key_value.bias": "model-00055-of-00071.safetensors", + "transformer.h.53.self_attention.query_key_value.weight": "model-00055-of-00071.safetensors", + "transformer.h.54.input_layernorm.bias": "model-00055-of-00071.safetensors", + "transformer.h.54.input_layernorm.weight": "model-00055-of-00071.safetensors", + "transformer.h.54.mlp.dense_4h_to_h.bias": "model-00056-of-00071.safetensors", + "transformer.h.54.mlp.dense_4h_to_h.weight": "model-00056-of-00071.safetensors", + "transformer.h.54.mlp.dense_h_to_4h.bias": "model-00056-of-00071.safetensors", + "transformer.h.54.mlp.dense_h_to_4h.weight": "model-00056-of-00071.safetensors", + "transformer.h.54.post_attention_layernorm.bias": "model-00056-of-00071.safetensors", + "transformer.h.54.post_attention_layernorm.weight": "model-00056-of-00071.safetensors", + "transformer.h.54.self_attention.dense.bias": "model-00056-of-00071.safetensors", + "transformer.h.54.self_attention.dense.weight": "model-00056-of-00071.safetensors", + "transformer.h.54.self_attention.query_key_value.bias": "model-00056-of-00071.safetensors", + "transformer.h.54.self_attention.query_key_value.weight": "model-00056-of-00071.safetensors", + "transformer.h.55.input_layernorm.bias": "model-00056-of-00071.safetensors", + "transformer.h.55.input_layernorm.weight": "model-00056-of-00071.safetensors", + "transformer.h.55.mlp.dense_4h_to_h.bias": "model-00057-of-00071.safetensors", + "transformer.h.55.mlp.dense_4h_to_h.weight": "model-00057-of-00071.safetensors", + "transformer.h.55.mlp.dense_h_to_4h.bias": "model-00057-of-00071.safetensors", + "transformer.h.55.mlp.dense_h_to_4h.weight": "model-00057-of-00071.safetensors", + "transformer.h.55.post_attention_layernorm.bias": "model-00057-of-00071.safetensors", + "transformer.h.55.post_attention_layernorm.weight": "model-00057-of-00071.safetensors", + "transformer.h.55.self_attention.dense.bias": "model-00057-of-00071.safetensors", + "transformer.h.55.self_attention.dense.weight": "model-00057-of-00071.safetensors", + "transformer.h.55.self_attention.query_key_value.bias": "model-00057-of-00071.safetensors", + "transformer.h.55.self_attention.query_key_value.weight": "model-00057-of-00071.safetensors", + "transformer.h.56.input_layernorm.bias": "model-00057-of-00071.safetensors", + "transformer.h.56.input_layernorm.weight": "model-00057-of-00071.safetensors", + "transformer.h.56.mlp.dense_4h_to_h.bias": "model-00058-of-00071.safetensors", + "transformer.h.56.mlp.dense_4h_to_h.weight": "model-00058-of-00071.safetensors", + "transformer.h.56.mlp.dense_h_to_4h.bias": "model-00058-of-00071.safetensors", + "transformer.h.56.mlp.dense_h_to_4h.weight": "model-00058-of-00071.safetensors", + "transformer.h.56.post_attention_layernorm.bias": "model-00058-of-00071.safetensors", + "transformer.h.56.post_attention_layernorm.weight": "model-00058-of-00071.safetensors", + "transformer.h.56.self_attention.dense.bias": "model-00058-of-00071.safetensors", + "transformer.h.56.self_attention.dense.weight": "model-00058-of-00071.safetensors", + "transformer.h.56.self_attention.query_key_value.bias": "model-00058-of-00071.safetensors", + "transformer.h.56.self_attention.query_key_value.weight": "model-00058-of-00071.safetensors", + "transformer.h.57.input_layernorm.bias": "model-00058-of-00071.safetensors", + "transformer.h.57.input_layernorm.weight": "model-00058-of-00071.safetensors", + "transformer.h.57.mlp.dense_4h_to_h.bias": "model-00059-of-00071.safetensors", + "transformer.h.57.mlp.dense_4h_to_h.weight": "model-00059-of-00071.safetensors", + "transformer.h.57.mlp.dense_h_to_4h.bias": "model-00059-of-00071.safetensors", + "transformer.h.57.mlp.dense_h_to_4h.weight": "model-00059-of-00071.safetensors", + "transformer.h.57.post_attention_layernorm.bias": "model-00059-of-00071.safetensors", + "transformer.h.57.post_attention_layernorm.weight": "model-00059-of-00071.safetensors", + "transformer.h.57.self_attention.dense.bias": "model-00059-of-00071.safetensors", + "transformer.h.57.self_attention.dense.weight": "model-00059-of-00071.safetensors", + "transformer.h.57.self_attention.query_key_value.bias": "model-00059-of-00071.safetensors", + "transformer.h.57.self_attention.query_key_value.weight": "model-00059-of-00071.safetensors", + "transformer.h.58.input_layernorm.bias": "model-00059-of-00071.safetensors", + "transformer.h.58.input_layernorm.weight": "model-00059-of-00071.safetensors", + "transformer.h.58.mlp.dense_4h_to_h.bias": "model-00060-of-00071.safetensors", + "transformer.h.58.mlp.dense_4h_to_h.weight": "model-00060-of-00071.safetensors", + "transformer.h.58.mlp.dense_h_to_4h.bias": "model-00060-of-00071.safetensors", + "transformer.h.58.mlp.dense_h_to_4h.weight": "model-00060-of-00071.safetensors", + "transformer.h.58.post_attention_layernorm.bias": "model-00060-of-00071.safetensors", + "transformer.h.58.post_attention_layernorm.weight": "model-00060-of-00071.safetensors", + "transformer.h.58.self_attention.dense.bias": "model-00060-of-00071.safetensors", + "transformer.h.58.self_attention.dense.weight": "model-00060-of-00071.safetensors", + "transformer.h.58.self_attention.query_key_value.bias": "model-00060-of-00071.safetensors", + "transformer.h.58.self_attention.query_key_value.weight": "model-00060-of-00071.safetensors", + "transformer.h.59.input_layernorm.bias": "model-00060-of-00071.safetensors", + "transformer.h.59.input_layernorm.weight": "model-00060-of-00071.safetensors", + "transformer.h.59.mlp.dense_4h_to_h.bias": "model-00061-of-00071.safetensors", + "transformer.h.59.mlp.dense_4h_to_h.weight": "model-00061-of-00071.safetensors", + "transformer.h.59.mlp.dense_h_to_4h.bias": "model-00061-of-00071.safetensors", + "transformer.h.59.mlp.dense_h_to_4h.weight": "model-00061-of-00071.safetensors", + "transformer.h.59.post_attention_layernorm.bias": "model-00061-of-00071.safetensors", + "transformer.h.59.post_attention_layernorm.weight": "model-00061-of-00071.safetensors", + "transformer.h.59.self_attention.dense.bias": "model-00061-of-00071.safetensors", + "transformer.h.59.self_attention.dense.weight": "model-00061-of-00071.safetensors", + "transformer.h.59.self_attention.query_key_value.bias": "model-00061-of-00071.safetensors", + "transformer.h.59.self_attention.query_key_value.weight": "model-00061-of-00071.safetensors", + "transformer.h.6.input_layernorm.bias": "model-00007-of-00071.safetensors", + "transformer.h.6.input_layernorm.weight": "model-00007-of-00071.safetensors", + "transformer.h.6.mlp.dense_4h_to_h.bias": "model-00008-of-00071.safetensors", + "transformer.h.6.mlp.dense_4h_to_h.weight": "model-00008-of-00071.safetensors", + "transformer.h.6.mlp.dense_h_to_4h.bias": "model-00008-of-00071.safetensors", + "transformer.h.6.mlp.dense_h_to_4h.weight": "model-00008-of-00071.safetensors", + "transformer.h.6.post_attention_layernorm.bias": "model-00008-of-00071.safetensors", + "transformer.h.6.post_attention_layernorm.weight": "model-00008-of-00071.safetensors", + "transformer.h.6.self_attention.dense.bias": "model-00008-of-00071.safetensors", + "transformer.h.6.self_attention.dense.weight": "model-00008-of-00071.safetensors", + "transformer.h.6.self_attention.query_key_value.bias": "model-00008-of-00071.safetensors", + "transformer.h.6.self_attention.query_key_value.weight": "model-00008-of-00071.safetensors", + "transformer.h.60.input_layernorm.bias": "model-00061-of-00071.safetensors", + "transformer.h.60.input_layernorm.weight": "model-00061-of-00071.safetensors", + "transformer.h.60.mlp.dense_4h_to_h.bias": "model-00062-of-00071.safetensors", + "transformer.h.60.mlp.dense_4h_to_h.weight": "model-00062-of-00071.safetensors", + "transformer.h.60.mlp.dense_h_to_4h.bias": "model-00062-of-00071.safetensors", + "transformer.h.60.mlp.dense_h_to_4h.weight": "model-00062-of-00071.safetensors", + "transformer.h.60.post_attention_layernorm.bias": "model-00062-of-00071.safetensors", + "transformer.h.60.post_attention_layernorm.weight": "model-00062-of-00071.safetensors", + "transformer.h.60.self_attention.dense.bias": "model-00062-of-00071.safetensors", + "transformer.h.60.self_attention.dense.weight": "model-00062-of-00071.safetensors", + "transformer.h.60.self_attention.query_key_value.bias": "model-00062-of-00071.safetensors", + "transformer.h.60.self_attention.query_key_value.weight": "model-00062-of-00071.safetensors", + "transformer.h.61.input_layernorm.bias": "model-00062-of-00071.safetensors", + "transformer.h.61.input_layernorm.weight": "model-00062-of-00071.safetensors", + "transformer.h.61.mlp.dense_4h_to_h.bias": "model-00063-of-00071.safetensors", + "transformer.h.61.mlp.dense_4h_to_h.weight": "model-00063-of-00071.safetensors", + "transformer.h.61.mlp.dense_h_to_4h.bias": "model-00063-of-00071.safetensors", + "transformer.h.61.mlp.dense_h_to_4h.weight": "model-00063-of-00071.safetensors", + "transformer.h.61.post_attention_layernorm.bias": "model-00063-of-00071.safetensors", + "transformer.h.61.post_attention_layernorm.weight": "model-00063-of-00071.safetensors", + "transformer.h.61.self_attention.dense.bias": "model-00063-of-00071.safetensors", + "transformer.h.61.self_attention.dense.weight": "model-00063-of-00071.safetensors", + "transformer.h.61.self_attention.query_key_value.bias": "model-00063-of-00071.safetensors", + "transformer.h.61.self_attention.query_key_value.weight": "model-00063-of-00071.safetensors", + "transformer.h.62.input_layernorm.bias": "model-00063-of-00071.safetensors", + "transformer.h.62.input_layernorm.weight": "model-00063-of-00071.safetensors", + "transformer.h.62.mlp.dense_4h_to_h.bias": "model-00064-of-00071.safetensors", + "transformer.h.62.mlp.dense_4h_to_h.weight": "model-00064-of-00071.safetensors", + "transformer.h.62.mlp.dense_h_to_4h.bias": "model-00064-of-00071.safetensors", + "transformer.h.62.mlp.dense_h_to_4h.weight": "model-00064-of-00071.safetensors", + "transformer.h.62.post_attention_layernorm.bias": "model-00064-of-00071.safetensors", + "transformer.h.62.post_attention_layernorm.weight": "model-00064-of-00071.safetensors", + "transformer.h.62.self_attention.dense.bias": "model-00064-of-00071.safetensors", + "transformer.h.62.self_attention.dense.weight": "model-00064-of-00071.safetensors", + "transformer.h.62.self_attention.query_key_value.bias": "model-00064-of-00071.safetensors", + "transformer.h.62.self_attention.query_key_value.weight": "model-00064-of-00071.safetensors", + "transformer.h.63.input_layernorm.bias": "model-00064-of-00071.safetensors", + "transformer.h.63.input_layernorm.weight": "model-00064-of-00071.safetensors", + "transformer.h.63.mlp.dense_4h_to_h.bias": "model-00065-of-00071.safetensors", + "transformer.h.63.mlp.dense_4h_to_h.weight": "model-00065-of-00071.safetensors", + "transformer.h.63.mlp.dense_h_to_4h.bias": "model-00065-of-00071.safetensors", + "transformer.h.63.mlp.dense_h_to_4h.weight": "model-00065-of-00071.safetensors", + "transformer.h.63.post_attention_layernorm.bias": "model-00065-of-00071.safetensors", + "transformer.h.63.post_attention_layernorm.weight": "model-00065-of-00071.safetensors", + "transformer.h.63.self_attention.dense.bias": "model-00065-of-00071.safetensors", + "transformer.h.63.self_attention.dense.weight": "model-00065-of-00071.safetensors", + "transformer.h.63.self_attention.query_key_value.bias": "model-00065-of-00071.safetensors", + "transformer.h.63.self_attention.query_key_value.weight": "model-00065-of-00071.safetensors", + "transformer.h.64.input_layernorm.bias": "model-00065-of-00071.safetensors", + "transformer.h.64.input_layernorm.weight": "model-00065-of-00071.safetensors", + "transformer.h.64.mlp.dense_4h_to_h.bias": "model-00066-of-00071.safetensors", + "transformer.h.64.mlp.dense_4h_to_h.weight": "model-00066-of-00071.safetensors", + "transformer.h.64.mlp.dense_h_to_4h.bias": "model-00066-of-00071.safetensors", + "transformer.h.64.mlp.dense_h_to_4h.weight": "model-00066-of-00071.safetensors", + "transformer.h.64.post_attention_layernorm.bias": "model-00066-of-00071.safetensors", + "transformer.h.64.post_attention_layernorm.weight": "model-00066-of-00071.safetensors", + "transformer.h.64.self_attention.dense.bias": "model-00066-of-00071.safetensors", + "transformer.h.64.self_attention.dense.weight": "model-00066-of-00071.safetensors", + "transformer.h.64.self_attention.query_key_value.bias": "model-00066-of-00071.safetensors", + "transformer.h.64.self_attention.query_key_value.weight": "model-00066-of-00071.safetensors", + "transformer.h.65.input_layernorm.bias": "model-00066-of-00071.safetensors", + "transformer.h.65.input_layernorm.weight": "model-00066-of-00071.safetensors", + "transformer.h.65.mlp.dense_4h_to_h.bias": "model-00067-of-00071.safetensors", + "transformer.h.65.mlp.dense_4h_to_h.weight": "model-00067-of-00071.safetensors", + "transformer.h.65.mlp.dense_h_to_4h.bias": "model-00067-of-00071.safetensors", + "transformer.h.65.mlp.dense_h_to_4h.weight": "model-00067-of-00071.safetensors", + "transformer.h.65.post_attention_layernorm.bias": "model-00067-of-00071.safetensors", + "transformer.h.65.post_attention_layernorm.weight": "model-00067-of-00071.safetensors", + "transformer.h.65.self_attention.dense.bias": "model-00067-of-00071.safetensors", + "transformer.h.65.self_attention.dense.weight": "model-00067-of-00071.safetensors", + "transformer.h.65.self_attention.query_key_value.bias": "model-00067-of-00071.safetensors", + "transformer.h.65.self_attention.query_key_value.weight": "model-00067-of-00071.safetensors", + "transformer.h.66.input_layernorm.bias": "model-00067-of-00071.safetensors", + "transformer.h.66.input_layernorm.weight": "model-00067-of-00071.safetensors", + "transformer.h.66.mlp.dense_4h_to_h.bias": "model-00068-of-00071.safetensors", + "transformer.h.66.mlp.dense_4h_to_h.weight": "model-00068-of-00071.safetensors", + "transformer.h.66.mlp.dense_h_to_4h.bias": "model-00068-of-00071.safetensors", + "transformer.h.66.mlp.dense_h_to_4h.weight": "model-00068-of-00071.safetensors", + "transformer.h.66.post_attention_layernorm.bias": "model-00068-of-00071.safetensors", + "transformer.h.66.post_attention_layernorm.weight": "model-00068-of-00071.safetensors", + "transformer.h.66.self_attention.dense.bias": "model-00068-of-00071.safetensors", + "transformer.h.66.self_attention.dense.weight": "model-00068-of-00071.safetensors", + "transformer.h.66.self_attention.query_key_value.bias": "model-00068-of-00071.safetensors", + "transformer.h.66.self_attention.query_key_value.weight": "model-00068-of-00071.safetensors", + "transformer.h.67.input_layernorm.bias": "model-00068-of-00071.safetensors", + "transformer.h.67.input_layernorm.weight": "model-00068-of-00071.safetensors", + "transformer.h.67.mlp.dense_4h_to_h.bias": "model-00069-of-00071.safetensors", + "transformer.h.67.mlp.dense_4h_to_h.weight": "model-00069-of-00071.safetensors", + "transformer.h.67.mlp.dense_h_to_4h.bias": "model-00069-of-00071.safetensors", + "transformer.h.67.mlp.dense_h_to_4h.weight": "model-00069-of-00071.safetensors", + "transformer.h.67.post_attention_layernorm.bias": "model-00069-of-00071.safetensors", + "transformer.h.67.post_attention_layernorm.weight": "model-00069-of-00071.safetensors", + "transformer.h.67.self_attention.dense.bias": "model-00069-of-00071.safetensors", + "transformer.h.67.self_attention.dense.weight": "model-00069-of-00071.safetensors", + "transformer.h.67.self_attention.query_key_value.bias": "model-00069-of-00071.safetensors", + "transformer.h.67.self_attention.query_key_value.weight": "model-00069-of-00071.safetensors", + "transformer.h.68.input_layernorm.bias": "model-00069-of-00071.safetensors", + "transformer.h.68.input_layernorm.weight": "model-00069-of-00071.safetensors", + "transformer.h.68.mlp.dense_4h_to_h.bias": "model-00070-of-00071.safetensors", + "transformer.h.68.mlp.dense_4h_to_h.weight": "model-00070-of-00071.safetensors", + "transformer.h.68.mlp.dense_h_to_4h.bias": "model-00070-of-00071.safetensors", + "transformer.h.68.mlp.dense_h_to_4h.weight": "model-00070-of-00071.safetensors", + "transformer.h.68.post_attention_layernorm.bias": "model-00070-of-00071.safetensors", + "transformer.h.68.post_attention_layernorm.weight": "model-00070-of-00071.safetensors", + "transformer.h.68.self_attention.dense.bias": "model-00070-of-00071.safetensors", + "transformer.h.68.self_attention.dense.weight": "model-00070-of-00071.safetensors", + "transformer.h.68.self_attention.query_key_value.bias": "model-00070-of-00071.safetensors", + "transformer.h.68.self_attention.query_key_value.weight": "model-00070-of-00071.safetensors", + "transformer.h.69.input_layernorm.bias": "model-00070-of-00071.safetensors", + "transformer.h.69.input_layernorm.weight": "model-00070-of-00071.safetensors", + "transformer.h.69.mlp.dense_4h_to_h.bias": "model-00071-of-00071.safetensors", + "transformer.h.69.mlp.dense_4h_to_h.weight": "model-00071-of-00071.safetensors", + "transformer.h.69.mlp.dense_h_to_4h.bias": "model-00071-of-00071.safetensors", + "transformer.h.69.mlp.dense_h_to_4h.weight": "model-00071-of-00071.safetensors", + "transformer.h.69.post_attention_layernorm.bias": "model-00071-of-00071.safetensors", + "transformer.h.69.post_attention_layernorm.weight": "model-00071-of-00071.safetensors", + "transformer.h.69.self_attention.dense.bias": "model-00071-of-00071.safetensors", + "transformer.h.69.self_attention.dense.weight": "model-00071-of-00071.safetensors", + "transformer.h.69.self_attention.query_key_value.bias": "model-00071-of-00071.safetensors", + "transformer.h.69.self_attention.query_key_value.weight": "model-00071-of-00071.safetensors", + "transformer.h.7.input_layernorm.bias": "model-00008-of-00071.safetensors", + "transformer.h.7.input_layernorm.weight": "model-00008-of-00071.safetensors", + "transformer.h.7.mlp.dense_4h_to_h.bias": "model-00009-of-00071.safetensors", + "transformer.h.7.mlp.dense_4h_to_h.weight": "model-00009-of-00071.safetensors", + "transformer.h.7.mlp.dense_h_to_4h.bias": "model-00009-of-00071.safetensors", + "transformer.h.7.mlp.dense_h_to_4h.weight": "model-00009-of-00071.safetensors", + "transformer.h.7.post_attention_layernorm.bias": "model-00009-of-00071.safetensors", + "transformer.h.7.post_attention_layernorm.weight": "model-00009-of-00071.safetensors", + "transformer.h.7.self_attention.dense.bias": "model-00009-of-00071.safetensors", + "transformer.h.7.self_attention.dense.weight": "model-00009-of-00071.safetensors", + "transformer.h.7.self_attention.query_key_value.bias": "model-00009-of-00071.safetensors", + "transformer.h.7.self_attention.query_key_value.weight": "model-00009-of-00071.safetensors", + "transformer.h.8.input_layernorm.bias": "model-00009-of-00071.safetensors", + "transformer.h.8.input_layernorm.weight": "model-00009-of-00071.safetensors", + "transformer.h.8.mlp.dense_4h_to_h.bias": "model-00010-of-00071.safetensors", + "transformer.h.8.mlp.dense_4h_to_h.weight": "model-00010-of-00071.safetensors", + "transformer.h.8.mlp.dense_h_to_4h.bias": "model-00010-of-00071.safetensors", + "transformer.h.8.mlp.dense_h_to_4h.weight": "model-00010-of-00071.safetensors", + "transformer.h.8.post_attention_layernorm.bias": "model-00010-of-00071.safetensors", + "transformer.h.8.post_attention_layernorm.weight": "model-00010-of-00071.safetensors", + "transformer.h.8.self_attention.dense.bias": "model-00010-of-00071.safetensors", + "transformer.h.8.self_attention.dense.weight": "model-00010-of-00071.safetensors", + "transformer.h.8.self_attention.query_key_value.bias": "model-00010-of-00071.safetensors", + "transformer.h.8.self_attention.query_key_value.weight": "model-00010-of-00071.safetensors", + "transformer.h.9.input_layernorm.bias": "model-00010-of-00071.safetensors", + "transformer.h.9.input_layernorm.weight": "model-00010-of-00071.safetensors", + "transformer.h.9.mlp.dense_4h_to_h.bias": "model-00011-of-00071.safetensors", + "transformer.h.9.mlp.dense_4h_to_h.weight": "model-00011-of-00071.safetensors", + "transformer.h.9.mlp.dense_h_to_4h.bias": "model-00011-of-00071.safetensors", + "transformer.h.9.mlp.dense_h_to_4h.weight": "model-00011-of-00071.safetensors", + "transformer.h.9.post_attention_layernorm.bias": "model-00011-of-00071.safetensors", + "transformer.h.9.post_attention_layernorm.weight": "model-00011-of-00071.safetensors", + "transformer.h.9.self_attention.dense.bias": "model-00011-of-00071.safetensors", + "transformer.h.9.self_attention.dense.weight": "model-00011-of-00071.safetensors", + "transformer.h.9.self_attention.query_key_value.bias": "model-00011-of-00071.safetensors", + "transformer.h.9.self_attention.query_key_value.weight": "model-00011-of-00071.safetensors", + "transformer.ln_f.bias": "model-00071-of-00071.safetensors", + "transformer.ln_f.weight": "model-00071-of-00071.safetensors", + "transformer.word_embeddings.weight": "model-00001-of-00071.safetensors", + "transformer.word_embeddings_layernorm.bias": "model-00002-of-00071.safetensors", + "transformer.word_embeddings_layernorm.weight": "model-00002-of-00071.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..700610a3634e99521c1da919a8813578450f404e --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15972849c91c78dfab27e2ef0eb53b17f95c4fe81be6664f7e85c6a7fffde0e +size 14500499 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b9ca7f0ee5d840ffd0f9fc484d968d03889bdcf --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "left", + "tokenizer_class": "BloomTokenizer", + "unk_token": "" +}