diff --git a/checkpoint_metadata.json b/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..40308f84295ac790648ff212cb135beaf81568f3 --- /dev/null +++ b/checkpoint_metadata.json @@ -0,0 +1,18 @@ +{ + "custom_metas": null, + "dp": 256, + "metas": { + "consumed_train_samples": 3927040000, + "data_stages": [ + { + "consumed_train_samples": 3927040000, + "name": "stable", + "start_training_step": 1 + } + ], + "last_stage_idx": 0, + "last_train_step": 3835000 + }, + "tp": 1, + "version": "1.4" +} \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b5e8450f023bca5f602cc566cf25f00daf77f9b --- /dev/null +++ b/config.yaml @@ -0,0 +1,154 @@ +checkpoints: + checkpoint_interval: 1000 + checkpoints_path: /scratch/loubna/checkpoints/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0- + checkpoints_path_is_shared_file_system: false + overwrite_datastage: false + resume_checkpoint_path: /scratch/loubna/checkpoints/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-/3074000 + save_initial_state: true +data_stages: +- data: + dataset: + dataloader_type: cyclic + dataset_max_tokens: null + dataset_weights: + - 1 + datasets: + - filename_pattern: .*\.ds$ + folder: /fsx/loubna/tokenized_for_exps/phase2_mixture + seed: 0 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 0 + name: stable + start_training_step: 1 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 50 + repo_id: HuggingFaceTB/smollm-big-run + repo_public: false + tensorboard_dir: /scratch/loubna/tensorboard-cosmo-smollm-big-run + wandb_logger: + wandb_entity: loubnabnl + wandb_project: smollm-big-run +general: + benchmark_csv_path: null + consumed_train_samples: 3927040000 + ignore_sanity_checks: true + project: smollm-big-run + run: smollm-big-run-1p81G-smollm-1.7B-8T-seed-0- + seed: 42 + step: 3835000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: HuggingFaceTB/smollm-big-run + hub_repo_tensorboard: HuggingFaceTB/smollm-big-run + local_output_path: /scratch/loubna/lighteval/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0- + push_details_to_hub: false + push_results_to_hub: true + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/loubna/logs/smollmv2/eval-scripts + slurm_template: /fsx/loubna/projects/brrr/examples/loubna/eval_1b.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: + wandb_entity: loubnabnl + wandb_project: smollm-big-run + wandb_run_name: smollm-big-run-1p81G-smollm-1.7B-8T-seed-0-_evals +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 0 + eos_token_id: 0 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 49152 +optimizer: + accumulate_grad_in_fp32: true + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0005 + lr_decay_starting_step: 3870000 + lr_decay_steps: 430000 + lr_decay_style: linear + lr_warmup_steps: 2000 + lr_warmup_style: linear + min_decay_lr: 0 + optimizer_factory: + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + name: adamW + torch_adam_is_fused: true + weight_decay: 0.01 + zero_stage: 0 +parallelism: + dp: 256 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /admin/home/loubna/miniconda3/envs/nanotron/bin/s5cmd + upload_s3_path: s3://synthetic-project-models/big-run-5T/smollm-big-run-1p81G-smollm-1.7B-8T-seed-0- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 1 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 4300000 + val_check_interval: 100 diff --git a/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c3cb613141fe5acb1256ea9fcc5647ebf3240c2 --- /dev/null +++ b/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df8c6ef0e6b6765d34a2f7925b5d554f60178786f7b11574253a946d94b5242 +size 8388848 diff --git a/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b487611cd8d2e571edccd0b4daa83b80263d30b --- /dev/null +++ b/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8796f38fe9bb2dac138f34c692ac9f6117b23c8ce418a8a728455571d31673 +size 25166176 diff --git a/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4e7496c1d8f4ad12c066dea00009e11aa44aad9 --- /dev/null +++ b/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c895b868d611297edfd5c1f4aab87760eeae49fc26e006a60676f656f4bbc7 +size 4192 diff --git a/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d87eafaf3dba780b6b1f08674899859612923005 --- /dev/null +++ b/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0dbcebf05b34144d645792fb3fe6eff91c86805e0c053b8ba542042c2e7e15 +size 33554672 diff --git a/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0ec8f20a52c812abeb77ba8bf3f39ffb7a10cd9 --- /dev/null +++ b/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b3b8ce534d8e2ed1f47119016fe312b954a541657dcd20c0b048c71ec47a18 +size 67109160 diff --git a/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f76be3a636ef5b00aeac20a50dff923c3c73ef16 --- /dev/null +++ b/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006e5c4380b961927f1ba842100fcb04c6ed926ea90698dbef2ad0b1cc32469a +size 4192 diff --git a/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..250ccede98cbd6c4f7e58418910996b6c327e8e2 --- /dev/null +++ b/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48080c61c6dbfaea481c8ed1476ad32c550355e0aacdc0b5ba15b333ab205d60 +size 8388848 diff --git a/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..daeb8a4948a2c687a7d2e0153a0291ab304d8cb9 --- /dev/null +++ b/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc24e3f9f7a37b47cf198dea098d353c2963a76be30af053edd3008625559b3 +size 25166176 diff --git a/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..923b87b529aff45e0fa1c9f8c0d9cdf33acb2a9f --- /dev/null +++ b/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603f8bdecd8c1e58e6d07e2934fd4e39e59d16199334d82683360384768efba7 +size 4192 diff --git a/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..355102c042980b78bab3c78c0dd4dfe9fad7dcb6 --- /dev/null +++ b/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b16a02681152efc5ecf60441a2946fcb819b37bc8403c76ba182dc3d457091e +size 33554672 diff --git a/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b92b9bfd42ce3e035487e7d9006fe813e11321f0 --- /dev/null +++ b/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8764a664692dd1185234e1e12e670d714f53a60fd50b33e26bc2a56502c51a +size 67109160 diff --git a/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4d0a90ef7e8831e70e40eaf1730b53198ca9510 --- /dev/null +++ b/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0376bdcb798935572b53f6f5e8eb79aff5c0cd830ea9a2fd3e01f565771720af +size 4192 diff --git a/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..131b6f42e3d72a9ba661ce1ac97b0c075f9b6f76 --- /dev/null +++ b/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c597cd4d5049325257d323a5e4f84794428fbf501df2634ad16f9bec4efac234 +size 8388848 diff --git a/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90abb5ff127f021619141e72c2f3e0701aaf7380 --- /dev/null +++ b/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded8bfa83bf89d082fd04e02d3d8377557c2f1e09a4a2ad1993feedea9cbeec5 +size 25166176 diff --git a/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..661aaf69f10ca62278fa9011ae57cf84fed873b4 --- /dev/null +++ b/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2170b9eb9597297aed1a2fc619a7098d1c6517c39ff4d846de75a5afb1fa75dd +size 4192 diff --git a/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36d1eff0a057451520ba6ec6fb99520eccd1d3e6 --- /dev/null +++ b/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547e7ef5c94e2ef8e89c8434fe745d43377c14a7c4c87248c04c7645043137ef +size 33554672 diff --git a/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00dcb67aab89cf118545f163cea65314870ea87f --- /dev/null +++ b/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30333788f612dee918339620b44ff34171fc619b11211288f7d5439c2f57baca +size 67109160 diff --git a/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a3a6bab47d464b2bb5b72ddd518ceb41acc5fbd --- /dev/null +++ b/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c63998b611b39bfe475a95234e79eea533d421e495fe6a761a7a483a5f74e3b +size 4192 diff --git a/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..020cde45746734194b31593516e4a417a3a0beaf --- /dev/null +++ b/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58c88d7bfd335ec6bc9c49920e16666837c708a9dd2e16a842b5474009a184b +size 8388848 diff --git a/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d21f0df38e7701d0e5785ac120e2c2089a90018c --- /dev/null +++ b/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d67183df2c73626aa940a8950abe90feb528d565c4ea2ce65939b2cf7b9878 +size 25166176 diff --git a/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2935edc1268926b5830ffd93bf8e190167e8d61c --- /dev/null +++ b/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e4ac4c4b859f51a4b63c0b5dcf31efa306eb62d9bc78ada5ef5eb39af7009f +size 4192 diff --git a/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39a407cfc0119b205d821aee83fc40e01016ae80 --- /dev/null +++ b/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59edfedcf83b562b0b61b9cb74a5b45000af930b216142a4a7d0e6ce32351b56 +size 33554672 diff --git a/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da450b0759ba8604326b0e392695347abdd126cb --- /dev/null +++ b/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15bdf4dcabc0f76463183aaf7bc16ca50fa59121ef21b584403dcbc907ed25d7 +size 67109160 diff --git a/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20cb3da34d29abfc663283bc742b3c8f21010d21 --- /dev/null +++ b/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:110a693fea2c5dcd62eedf2474d7fc4b523ed8609093786b8b4f9635c84cc394 +size 4192 diff --git a/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36572887d18a358c0e4ba338a708d00eeda27c08 --- /dev/null +++ b/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b032ce45fbbf89acd43ecd6589ed7265631c69d9034f98e762adaaa89ca2b5e2 +size 8388848 diff --git a/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21499cac6aa2b22a345ecdcbe1e0091f8dfa233 --- /dev/null +++ b/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3574082e2108aa2cdfa206b7ec02a70ccc8bcac916fe3972e301213f057ac02 +size 25166176 diff --git a/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..469b3ce37f9bb66a8a69866b2688be772238e9b5 --- /dev/null +++ b/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6937f759ae5fe67413e111b13f5f3723946101e5b9ace33b00954d7afa39218d +size 4192 diff --git a/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49e67149523799d35679a78a812f39530e8310b7 --- /dev/null +++ b/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b43f13e06541f78b1f8a27773e6726a1dc20365bd1e0a80e4cbaba4fdc5166 +size 33554672 diff --git a/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de2573d13b637364a078b6352b41291b1054db6a --- /dev/null +++ b/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5c2e80c933c79bec2ff8ed1fa1b70baaf59c1851ced3dd40058861cbd5f450 +size 67109160 diff --git a/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fd30f429c8929bf0bc86a26e75a8e0e551ac467 --- /dev/null +++ b/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f0adb6483cdefa9f4db36c4ecfcf695f2536db798e142140e6f885d00732b3 +size 4192 diff --git a/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa55e7d8bbfe5d078a576b124cb5f3d8b6e2588b --- /dev/null +++ b/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d321e7be1aeac9ae9af4f4b98336ed65da4caf671e48fe07847feb3db1dab7c +size 8388848 diff --git a/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae3375e7b7b695c56bd00a94d341654e5ff2e69c --- /dev/null +++ b/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb572f72db675305f01bfd35f623e263c910488e3ea494bdb41b8abc75ffdd91 +size 25166176 diff --git a/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1796b151aa34e26ee308c147e5d65bc84232458f --- /dev/null +++ b/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882532a0c2bccaec8ffe09b385aa18d0fecac5839b5a97d705ab726252f0f152 +size 4192 diff --git a/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f54d08e67f5e52d4c3d2b69cf37a87546e62ded4 --- /dev/null +++ b/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f15d9ffaeae9258d03218ed6cced46b2fe1e628adff3f02f7cc90f09d8945 +size 33554672 diff --git a/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..307f6f8128dc342c6741905899ea5781315fbfc9 --- /dev/null +++ b/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0436ff9f279dbf07962b596474c13df283f911811f016e68235226c07c158092 +size 4192 diff --git a/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a05b195bcc1fd76038ec948493fe265446db9882 --- /dev/null +++ b/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40f707b73e00084e4b2f450e9e73f030780e6b08f1a1e5ec26e879ea8617f50 +size 8388848 diff --git a/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a46a70664992592a7ae7cee1250df895ff7b84e --- /dev/null +++ b/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a251966982f9414f4c7e20d0d596e47fd0c24cfbb6852e339f3cfd938cfe482d +size 25166176 diff --git a/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ec7dd9aa3ed2087d461a52e987aaa6449373b4c --- /dev/null +++ b/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3c553079a10e2d0ad91776ad070165da3082b02fb4b77165cafec3d79ad8b3 +size 4192 diff --git a/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dee9ee2ca3b9bc02f7806b01424f47b88ac7eec --- /dev/null +++ b/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbcea22b811bb7c4434117d2785b3b4c25c1dfe2d38be80d4f51feaf75f8a81f +size 33554672 diff --git a/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f8db7917d3f7e4219bbb7c930996ea15eebc046 --- /dev/null +++ b/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5931a0012da68afd6a5e092f1b385fc854e29707d6c1b9ce70a9f841fcabe5d0 +size 67109160 diff --git a/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51fbedca3e3d6170e57651436f88879377ba88ad --- /dev/null +++ b/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1741a9491e626a804399a2599493b1310db2b044253d65aa758bf7267f0a67dc +size 4192 diff --git a/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3137600092f1b68d81dea37dfc435d800d16f40 --- /dev/null +++ b/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e670c343754276fb61ae9959680ec5ccb0ab6989bf880224144c157442b14130 +size 8388848 diff --git a/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3511bc3f83f5f95804433d8a4faad4f0ed86f2a6 --- /dev/null +++ b/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095f588265dbf2b3d80cfcfdb3bfca6014007a7954950a74e5863d79663e4916 +size 25166176 diff --git a/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ab4746fa49c89902ece4592ad7d3c0419d48269 --- /dev/null +++ b/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff67b97846515617b0161d06ed06d6faedeedb2141269a4660dc8e564d0c5ba +size 4192 diff --git a/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04dc2af93a92350e6319512a262d2a24d7a48e57 --- /dev/null +++ b/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f358b102c0943cfac36fdd97ee224ba2f4e7378d92782d50954217cc2f2ac60 +size 33554672 diff --git a/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..443fa7a7f962a4b29d2e2e979d70f7b5e648675e --- /dev/null +++ b/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4914277c311a6e6dce6b6635c16cdf0856c831e19b73352dbfa02e34d102f9d +size 67109160 diff --git a/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7a3ac11cfdc3a8c39314662ba4001b7a12331c4 --- /dev/null +++ b/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a959a8f6bf4a58ac2a0cc3c95d6e1818cd09bb60f9af19f2666edd2b57ff15a +size 4192 diff --git a/model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e27f6aa88a6e4305c7595f92d3f88dbe13da0217 --- /dev/null +++ b/model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c897e0163e9a7e45feb48889e009b3127e8d34a927505ebea26d1f75930231 +size 8388848 diff --git a/model/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04f82ce8f91f21031c3b59fd7012453580e749f1 --- /dev/null +++ b/model/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee9783687bda938b859cf444d8dfa82d8ac33bc689764ca0ea8fbfb7d18b85b7 +size 25166176 diff --git a/model/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6cff90dbb52bc9b44ac25e24cd953d427cf274a --- /dev/null +++ b/model/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc7a790e901c4f997a80f19e37d1b380605719b0677fa102bc09094ef6f0da3 +size 4192 diff --git a/model/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8059b0d8b91875bd3e4feacecffa0fe2a6bcf716 --- /dev/null +++ b/model/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19829731bec5f487fdb99a46fe7803e52dc8bb12b1d8e3a4e80d26020c1c006 +size 33554672 diff --git a/model/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89df4976124b0d0620453d4247b9e131c31cd2c1 --- /dev/null +++ b/model/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07079025b9a3d0f447fcf993be799f55a05b7db43843d205c8c404fb9b6da1d4 +size 67109160 diff --git a/model/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29b651acd87a80d6cf3bde5db87fd7dd3ace503b --- /dev/null +++ b/model/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d94d2505f7f07613cfe0433dc449cefc26668412127bb516e34419561b7923 +size 4192 diff --git a/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32f5d729025400033ac24e4d72b1a0abb5e9b728 --- /dev/null +++ b/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b57bc659045a6f351d9bd357fb507ecf1da3dfa3b47df10d65688f5cb3ef793 +size 8388848 diff --git a/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddc7f52de541a6d6d5f88257fc00d2358ed186ea --- /dev/null +++ b/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f01cd55e8ec3b8335bf174115162de5a6de4e672bb86a1a7708aae78329d3d +size 25166176 diff --git a/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a8a6c5eb4069e1d680ef78b3083fd97fa6032a7 --- /dev/null +++ b/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b407715d88039e79785472cd74596f662bd98ceb01bf4c42379c18d352a4a4a1 +size 4192 diff --git a/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..330079f0b1d8c654889f826c111ab105d4967c75 --- /dev/null +++ b/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372be9df80c3db72abfce67b5d1ae53fb9e29b05977409e71cbc954251d3911f +size 33554672 diff --git a/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8873ae8b392850e81c50f56c9c5af19954fa7d8e --- /dev/null +++ b/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bdd0f89f1c3d0bd519777de364be14460b67752b37c8a97ad93e1b9a6d2cbc6 +size 67109160 diff --git a/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1eeda573e5a2ef845cf8e75b4067f420b7706549 --- /dev/null +++ b/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae9c862836a569e807834c4088729188aa79c9d12c63d8c2984941f6fc83919 +size 4192 diff --git a/model/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a2bfbf19bfe7bf3e8988267710b21c39f199665 --- /dev/null +++ b/model/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:045ade3d42b5f6142cf0d43a644c8721ed786ce884c0b6bef48a30375a0347d0 +size 8388848 diff --git a/model/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7ab0fe1ca7a87e35e1c2bbf51bf42570ce5ea23 --- /dev/null +++ b/model/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6165668b7c28d54e632f711e7b25256372cb3dde24b04a18c74da1035acb4a2 +size 25166176 diff --git a/model/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75d723becc5c1a53aacb35838c5fa9424fa99171 --- /dev/null +++ b/model/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e5a388501a644d5b3dfdd81fb533458b731ec78a749e54f1d5d884e45d3a33 +size 4192 diff --git a/model/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3134b36f7ddff9e0aa534180265289a9746510b6 --- /dev/null +++ b/model/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86181b7f429b7cf7da48cc5e8f72ff45a12fbc41b2689080efb24889a2a53e5 +size 33554672 diff --git a/model/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e030686a642ae44dd314563fb8f62743e4f10526 --- /dev/null +++ b/model/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0073ccf81d835519e0fc90b0875b31fabb30cd17e0b40c9f757eb75422994fe +size 67109160 diff --git a/model/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc12796bfdfeb905ebd6420415d4c7ea5d51036e --- /dev/null +++ b/model/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3633af730992bd0dc33623a02d2363772d059beef4f5c97d8ddbfdd47d4c70 +size 4192 diff --git a/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23b24c4b4600e2d6bb1de0083f65ed044626b33e --- /dev/null +++ b/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f98b77d4e9e5b131071925d7ae4de003726664580e28fb5b62695642c283867 +size 8388848 diff --git a/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..745343b00ec68dfcc74d02d7f60bb731e8aec417 --- /dev/null +++ b/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70e65e12f225e4cf27cbc731ce32d494bf330440345b33e249222bc4612e1e8 +size 25166176 diff --git a/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f326d500d5f0ddb1d75acdab86df13467962c3e --- /dev/null +++ b/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7424f4af8ef8310d5a4a047aa03e59de85fb4095ebde149f8eeec7f61f2fbce +size 4192 diff --git a/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e685b24584f92587648f3356ef44ea85ca443f9c --- /dev/null +++ b/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222187d852c2e3ea5d6f8d0af783cb130ca071dafd82e2e94fe41c44b2c86ebd +size 33554672 diff --git a/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63c7cf939731511c47372d7e7deae048ae95e119 --- /dev/null +++ b/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d08a099ffa63949e30600b72e8ac078a993061eb5493a0393e1dfa81ca8142d +size 67109160 diff --git a/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..826e624334474d9b1530f76303795351879bfdfe --- /dev/null +++ b/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fbfd60c6ba1fd1ea4cf09c4160f34d297f7eecb0120d6fd7cfe6f7d33e05e22 +size 4192 diff --git a/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4f1d8132536b777d16042166490c703be38dd50 --- /dev/null +++ b/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7bbb3978de26a36ace70b1e7bf322fbb526051cbb9437cd8321b56c9a7606c +size 8388848 diff --git a/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d9123884aa748606f2bd3d40de0e7b3d8fdccf8 --- /dev/null +++ b/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8895b45fb0a2dfb6b6e659556cbf3f3f461619d8be2940e0b6e0b7359464c625 +size 25166176 diff --git a/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caf6fb19b2c15f675edf42ff11dbb98d1f0573e4 --- /dev/null +++ b/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ad0a5844c74395d582ed9604bb57f22d5b3355dda9871f7945288574a21565 +size 4192 diff --git a/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53e3f89161e4e047348941ee9accf3e7d513b480 --- /dev/null +++ b/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f0a0a4ceda4d4a5e793195bc66c698c7e3d3ba73d48ba8c1ac3f6ce72e090a +size 33554672 diff --git a/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..346c896361f041eb62fe7da757415108d54b2711 --- /dev/null +++ b/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d858686db0582b21a17ec31c1256dce487c85ce5adcf8ebecb85b3f6c986d584 +size 67109160 diff --git a/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10247bb57d2c5bbcfefe06589f87fc000a7a4085 --- /dev/null +++ b/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2756b53a8b783ec5a852088a88409d832dfcb93644ae6eae2633dfb4b1e16684 +size 4192 diff --git a/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a607da27098f784b1820b3f454d4ee996a5912e5 --- /dev/null +++ b/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d54069e4c0d46881d6725c973b11a8c6ac314e45b29cc4d278cac2ce7db8fe1 +size 8388848 diff --git a/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d38f8dca2d40ea6e35b14a56c79627e3c64b2930 --- /dev/null +++ b/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c220ca202cdd97f648caa45de0f7b99cea621d72b8f57e5621a2244f42da97c +size 25166176 diff --git a/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c21e7158864e8b28f3c413c7d892c6038b19733b --- /dev/null +++ b/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb04cc6544d1d456714f45cf4cb39e2ae4aea5fef7da4b73e7ccbc2ba911a6c2 +size 4192 diff --git a/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15bc5a4bc1b3212120d14f3ba6413855d538eba5 --- /dev/null +++ b/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6711807cc409cc531f124171f625cea60668b269fef59794864907d76916a883 +size 33554672 diff --git a/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b23a73fa1ec75e6871bf139c91c4c7a8bbf444d --- /dev/null +++ b/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379b1ab4a6953eca6fdb6ff6a984c2b258c9c5e2b2799aef36ef2360b34ccdbf +size 67109160 diff --git a/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02e3e917d0d1314a34b41fb5380c54d77cf4753c --- /dev/null +++ b/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc6010ac7a1306c1248523495b835932aa478808bc563ebc3d59099c6a962d4 +size 4192 diff --git a/model/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22ca4e2c6140ca9a86ae381fd833fcfd8cb0c7b5 --- /dev/null +++ b/model/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b963c3ebc1d10791f266f8fc41dd13045929d178d6ce6f289d4018b88846ad05 +size 8388848 diff --git a/model/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f10167e23fe4c80054bb0a62ef4ded091672932 --- /dev/null +++ b/model/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c4216179470967b8d6b69863d9928ce6aa590cc28f534fe6d7ba4da4a25d5a +size 25166176 diff --git a/model/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f4efe8f8ca6dfda8c01b3598f4b8155c5f2d5f3 --- /dev/null +++ b/model/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2570bd4ef4e5dee9eef31f20fa59507e0ae5c13521785c80e4c7bbaa0b0a497f +size 4192 diff --git a/model/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dcf3f1d6078e5b4688a27e187b8810c9778db83 --- /dev/null +++ b/model/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc329d0b119f506e8d79efc36371acdc9cd6d46f9c4f92838bf44630389489c +size 33554672 diff --git a/model/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70d1ba9d0fc10e55ba59a665d53d300160c8776f --- /dev/null +++ b/model/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90bb2f4d31a15c9c6079cf2937add1e375f1a5735cad427175686021e0a519a +size 67109160 diff --git a/model/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ab7e16ec295999ef198731c133507cf0a35b33e --- /dev/null +++ b/model/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795bfce3e786ef5914e354fe758eb13249ca5e4b63ccd1a0c4727ac20e5ea3b0 +size 4192 diff --git a/model/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ec69772fda260e72cdc6ef01f0d3c5752e50a1f --- /dev/null +++ b/model/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4c7ac47d90dc7b2d7b4adf8cfa7a67fd59c09f674c1797fe1b6776df6e79548 +size 8388848 diff --git a/model/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c25d37ca7bb32134b491e2c3cce95992a5bc9c7c --- /dev/null +++ b/model/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0b1d72aa73aa16118aca8fd97f669e710cf6aea518f1caf505bf7beb1c604e +size 25166176 diff --git a/model/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cabab78c03ed624343584aa84c7c206d67637a7 --- /dev/null +++ b/model/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c767135daf4f04f0f313fa604bdb9c6a9c159d7a6a5767e0c684c35094de0f1d +size 4192 diff --git a/model/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f397369c21bd998edc464240494155d11af6b8e --- /dev/null +++ b/model/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f9b58098ec51fe0fa37320796540ca4ac5c8bbf7e84f3ecfcbdda089077e28 +size 33554672 diff --git a/model/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..037b8961b2ba2b883f9d075df65b93cc9b0bf504 --- /dev/null +++ b/model/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2486c190b8121c2b1f217455737c59278900bffbdaf4893443e7ae3f72d29cde +size 67109160 diff --git a/model/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f793d2a5927ae6f244270a752adc13b610e57247 --- /dev/null +++ b/model/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acdd4187967616ac7f3318c093beb29aace0cea007da054512208429516b53f5 +size 4192 diff --git a/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..026c0b335670dca279e59decbb6cdd35da3ef28d --- /dev/null +++ b/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf99defffba655820463ad3032554b099ed70de1bf90c696d30ea5b0fde3a28 +size 8388848 diff --git a/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5de69b65a4a283547db81f5c7b855e8164ee3d4a --- /dev/null +++ b/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b0267f58503bf0955eaa31d488835de38192c3647ca385c2fb6f54fc952240 +size 25166176 diff --git a/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84cabafddfe50490e4a11804ae14801ba2fab018 --- /dev/null +++ b/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e74a40d549a12481bb2d6628b160f116e4a730a3be036ff681ac99d5317acf7 +size 4192 diff --git a/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c028a896f2cf1b937a64106327f99d924c1a7a58 --- /dev/null +++ b/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121fd81f5fc7aa2fce9a7d57c7f2baaee5fa1f66bd5eb99616942176d0deb973 +size 33554672 diff --git a/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18f0e63413e272656638089c546b41da6f06e477 --- /dev/null +++ b/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dbde81833f16c03de0552827e093d9034ab379191ac6c7e141c1c4b29a82283 +size 67109160 diff --git a/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..925f577ab7a377ca4d484e8cba72c13f253894b4 --- /dev/null +++ b/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cae9fdd453ab181df44fef875fb877081234f9fe0277dcb9ec0524fce98bd8c +size 4192 diff --git a/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff34683dca13a7b6d4bcdc2f57cf12ceded15c4c --- /dev/null +++ b/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6afbddbf5d518ff866358e81eabc31efb00d779ac7d0916736aeca46025ae1 +size 8388848 diff --git a/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fb412205a9089b80f527b823552675cf1a695ea --- /dev/null +++ b/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a9e6ac7334676cc226daf7b8fbf1327914a4556c4f1445a45b82894f04a034f +size 25166176 diff --git a/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..908e30d6e85f0154510342fb5eb7d57fe4002a62 --- /dev/null +++ b/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55873dad1a176fb2e4627a35e5dd81d82a140de084adcf458a604bc62cc3c253 +size 4192 diff --git a/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f3c12904796de0933d71534bd2eb5a84f718bc0 --- /dev/null +++ b/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed298e0f2f823f3c97627bc6431f710f803b7b12e616e66a7eac39eaa64fdae8 +size 33554672 diff --git a/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..138747e5ed2f78d4ac94c7eb8024ec7931a45b8a --- /dev/null +++ b/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56522c1b835f745b48500bc61423c0b58ebd1359ba8e8d36a9c9eca4a1e209a2 +size 67109160 diff --git a/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d30e41d16d2f0d148e5b0eb4c24cc0cce5db7f7f --- /dev/null +++ b/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9adbfefcd23ea5e2f5897d0fefc3e74daf410b034564d37edcd181591d9c85a0 +size 4192 diff --git a/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fc3fd3db53d38fcf43ce6821a6f398b7fcd6dc1 --- /dev/null +++ b/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3083b7ee292addf23be2115313ef4d9782a9c3d16014c5b9742661c2849c66 +size 8388848 diff --git a/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5764ea9ffaddf5070709f00abdece356cd1d0f9 --- /dev/null +++ b/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5d3346d7ca23584fa2afe92da550e80fbdb6e14d6ca735f51e0ad386ed2936 +size 4192 diff --git a/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3ebc1b4f7536ee9fba53793a509c2ae520d335d --- /dev/null +++ b/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be24ffe3f7501e326178c1251457fed04ee49d202388036721e1e227cc2b6b9 +size 4192 diff --git a/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..555d722f01455cbc739bd56b692295a03327c87d --- /dev/null +++ b/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f91e701d3811fc80092de250dffa487812a4809d8ca7597945e8402750db51 +size 8388848 diff --git a/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4901da8c3816c83dba698a33d27690946a3bdd5 --- /dev/null +++ b/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985c59b1f3c24f45d7cfb55aad2a6eca6ad101f2961b569a3239ae8736fa4d9c +size 25166176 diff --git a/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7712e43362d4eb497a7fb6a5157b4c48a4474980 --- /dev/null +++ b/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08ebf61300a56dd7ff74c8e3e4f0c7d339e7b07734cb8cba8be39f6b3c06c68 +size 4192 diff --git a/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4173b9f44ff194c769e62b773929814780fe62e --- /dev/null +++ b/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d34d89c44c76816b54c033aece743a206ebac0fe0ba119a8e796b900aa4958 +size 33554672 diff --git a/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0895a9214408d8fa6b2f5377d0ef54c363bd651a --- /dev/null +++ b/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63caf36de8785e1f4b8efbb8230954f3d604f2a92ae6c45af11e3b123186a57 +size 67109160 diff --git a/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06f9a5b52ba49f4d2def2c1055dec95991ac5f8e --- /dev/null +++ b/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f485e708abef3ba79a16cbebb0b86c900cbe2d72d2f194717fbb573953891ff +size 4192 diff --git a/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..045da6fcee790ce9b127cb683234e126d7530c93 --- /dev/null +++ b/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b57c14a70698130caf515f50c509c926f5c496c29743c38e64e235e6bfec9fe +size 8388848 diff --git a/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d3d98dbcbedc9da533b15522b36f61ede181e9e --- /dev/null +++ b/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:255f5043da271573a7c15ee32fefbae53a606cf8441117c884a8544e98ed1dca +size 25166176 diff --git a/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbffbf0ed5840aa4bd16ef66cf1fb20e7f665259 --- /dev/null +++ b/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b65cb3d5e473a91ecb5661a7e6021599f8eb039ddb8ed69ffdafb6396bd703 +size 4192 diff --git a/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41609761be1b5533aeb35a2eddafc6aff4a8ecd7 --- /dev/null +++ b/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5402afd1efd521bad89da7a13623e52e8687fc893e5ebd620a73a8c779b67204 +size 33554672 diff --git a/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ce2bf3ce129e37c1ec00a5c30ea717d1929cedf --- /dev/null +++ b/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6e494540cf308c86549850dd4546545e47f9a697f8a75b16458574d97e5aab +size 67109160 diff --git a/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30a70cad2311186b3e0ce44120bb92a2a380a977 --- /dev/null +++ b/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c62498c21e6b9fb88f6c83ee9fbe6cda7a685bbfd2c74d060c12acaa030a401 +size 4192 diff --git a/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c57b291f4bdc1e2cff341bc18bfc7ac1a892f88f --- /dev/null +++ b/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc554922d42257cdcee1acb08ca4336c2fcec99cd6f45440f2d87bf62c3afbd +size 8388848 diff --git a/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..876079f76d5aafc05b1fb8102b87785157e86a70 --- /dev/null +++ b/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6bc84856fc4c73918a00b8ac9048b5e9f15d21ead1b537da69a99116e33c331 +size 25166176 diff --git a/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6ef71615e86298cfeba24ea63399f864ec7c5b7 --- /dev/null +++ b/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d82a754f68a38bd4a7e4530e78a9d9398cbded5491ee2a10bb1a306cfa01ac +size 4192 diff --git a/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c498ad7be104344f9d98ff3bd6897f089f5d95ca --- /dev/null +++ b/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7d5bdfd7f3b8ffacd6a7cdf8147242c2c020c7a237c884af4bd601791bd3b0 +size 33554672 diff --git a/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef129f4bf4e362ef199e2e2ba1afc76ecd2c0281 --- /dev/null +++ b/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd14b5cd3125ecddfbb33f4f7397763c46c4513275084112062376b2f5614ed +size 67109160 diff --git a/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0d50f32d4a7aff58388acb2aeab0aec4162aebf --- /dev/null +++ b/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb0b32b2af6201863558b4267a44417b144aeba6c1fd7805094fc5a1fdef956 +size 4192 diff --git a/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7f55288a465ebf07eac08e664fcf52f258abecc --- /dev/null +++ b/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d97f3d324f90de57eb5631082137d1c39bc358561f618bb387d52ce08eb576 +size 8388848 diff --git a/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c37c489e05270abfcd883c63496506a8c88f62f --- /dev/null +++ b/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418aeeef71dac8b8be3d11a580b4df7ddfc081662231c13438d8808198ff4c6c +size 25166176 diff --git a/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3680071cd64adf287de0850b0e1034e8ef27076 --- /dev/null +++ b/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6898dbfc56b7818878f2ddf941566f8d852a66096cf0fef9597270e4acf7ce91 +size 4192 diff --git a/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3025af688ead178a8405ad4acd39ac2c82e3597a --- /dev/null +++ b/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5423e3ff2bf1a1404e6b4d3a0eb6931444ed4944b7aed7c98ac8baeb17cf189 +size 33554672 diff --git a/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8373d32369bcee6f171344c5797e1b0cdc13c81 --- /dev/null +++ b/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4800db359c721f41ae6fe2ff291871706c15cafb61e9a3789e5dfac21dde73 +size 67109160 diff --git a/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d7731a4440b597fc76694d6178895375147a43e --- /dev/null +++ b/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d98ee98c91825fb813cdf237ca8ddafa76f6a51fbfbcf982934ce09ca7bced +size 4192 diff --git a/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e84cf3207ad290e326f1b3c6849a6f0fd7bd6c8 --- /dev/null +++ b/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf90ff8931a703011105b053d64946c9d4047253ca0726b23a47e853cad9f45 +size 8388848 diff --git a/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e9e598b0cefe8492d3c56e5492e4f07c505b32a --- /dev/null +++ b/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f99cf9001c159f4937e7f9ef11cb3fff5b98419bd8dd5f3659f66b52903f53 +size 25166176 diff --git a/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa50f592ec9aba5f6f686c72bfde67f8e4d32e7e --- /dev/null +++ b/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526a2aaf7717e70095ba5baeddda321a5a7a9a7fd7c545b4f0a66a14f9ed4b23 +size 4192 diff --git a/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d4c1ab06ee21d459dd5cdf482bac7c2a218238a --- /dev/null +++ b/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c567df0678d9e26189b064e35d956bf4846554e5960c2839401533d892a5b4df +size 33554672 diff --git a/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..287d9ca7c4a83972dd0d05a66ae38a0e1472c935 --- /dev/null +++ b/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad271b6adb65b36f30ab0d47ba8d4b68e34103f09d734bb6deb73fd005bc5ce +size 67109160 diff --git a/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ee263b07c074e3d4458f2a0f8d636c760c5e167 --- /dev/null +++ b/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cfbae535ee4594af78dd3553e33ba0525aa56bad1cd3ed9091095807c22aec2 +size 4192 diff --git a/model/model/final_layer_norm/pp_block/model_weight.safetensors b/model/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08088aec6217ba9da419b3f0084c7faf997b7b16 --- /dev/null +++ b/model/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be727482e89ed3264be5c4549b14aa8ec75a2bee4189c557fff33dd54d99612 +size 4192 diff --git a/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..665ae0e13eb31c6fe69caa1e82e8a5f1022d10fc --- /dev/null +++ b/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983b435300694f4ec2289788141ddd3dd0229710a1c08c4909ab9a3133596a05 +size 201326832 diff --git a/model_config.json b/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b22858b14f5b51300266367985c0691b047defa0 --- /dev/null +++ b/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 0, "eos_token_id": 0, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 49152} \ No newline at end of file diff --git a/optimizer/optimizer_config.json b/optimizer/optimizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b43a810ed79747ad6c7db726a71391eb39c2d223 --- /dev/null +++ b/optimizer/optimizer_config.json @@ -0,0 +1 @@ +{"type": "OptimizerFromGradientAccumulator", "parallelism": {"tp_size": "1", "dp_size": "256", "pp_size": "1", "expert_parallel_size": "1"}, "configs": {}} \ No newline at end of file diff --git a/random/tp-0-of-1_dp-121-of-256_pp-0-of-1.pt b/random/tp-0-of-1_dp-121-of-256_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..650f80d1b766c0fb36a8cd2eb1ff4ae05aebff2f --- /dev/null +++ b/random/tp-0-of-1_dp-121-of-256_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47f5b4e75927abc40d07eb3a57a1491f79caa9d7ba49064759164afcf4637350 +size 1096 diff --git a/random/tp-0-of-1_dp-169-of-256_pp-0-of-1.pt b/random/tp-0-of-1_dp-169-of-256_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ed8f4328a712af53589fc1a95c11a0735e6ba58 --- /dev/null +++ b/random/tp-0-of-1_dp-169-of-256_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4538bbe3ddca7f02aec6857fb7f875a9637782200789a0f9ac37f1a5b08441 +size 1096 diff --git a/random/tp-0-of-1_dp-174-of-256_pp-0-of-1.pt b/random/tp-0-of-1_dp-174-of-256_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..16f73f0d3f99118bd409e3d0f0be758ea65a3e52 --- /dev/null +++ b/random/tp-0-of-1_dp-174-of-256_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788289f9da769551ad48051a0df5d5e981b56d8eb291b3cb82e283f4352afab3 +size 1096 diff --git a/random/tp-0-of-1_dp-91-of-256_pp-0-of-1.pt b/random/tp-0-of-1_dp-91-of-256_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d61e446eabc0233b064cc642747d7d880a165c8c --- /dev/null +++ b/random/tp-0-of-1_dp-91-of-256_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3cb1c1385ead54408a9c016c18a27ac27aa6ecc3e843e0a7fb7b4f5501f0a53 +size 1092