{ "embedding_config": { "type": "TiedEmbeddingConfig", "input_scale": null, "logits_soft_cap": null, "precision": "float16" }, "rope_config": { "type": "LlamaRoPEConfig", "precision": "float16", "base": 500000.0, "max_sequence_length": 131072, "scaling_factor": 32.0, "original_context_length": 8192, "low_frequency_factor": 1.0, "high_frequency_factor": 4.0 }, "layer_config": { "pre_attention_norm_config": { "scale_precision": "float16", "accumulation_precision": "float32", "epsilon": 1e-05 }, "attention_config": { "qkv_projection_config": { "type": "FullPrecisionLinearConfig", "precision": "float16" }, "out_projection_config": { "type": "FullPrecisionLinearConfig", "precision": "float16" }, "logit_soft_cap": null, "has_qkv_biases": false, "has_out_biases": false }, "post_attention_norm_config": null, "pre_mlp_norm_config": { "scale_precision": "float16", "accumulation_precision": "float32", "epsilon": 1e-05 }, "mlp_config": { "linear_config": { "type": "FullPrecisionLinearConfig", "precision": "float16" }, "activation": "silu" }, "post_mlp_norm_config": null }, "output_norm_config": { "scale_precision": "float16", "accumulation_precision": "float32", "epsilon": 1e-05 }, "vocab_size": 128256, "model_dim": 2048, "hidden_dim": 8192, "num_heads": 32, "num_groups": 8, "head_dim": 64, "attention_scale": null, "num_layers": 16, "sliding_window_sizes": null, "context_length": 8192 }