[llama] model_name = internlm-chat head_num = 40 kv_head_num = 40 size_per_head = 128 vocab_size = 103168 num_layer = 60 rotary_embedding = 128 rope_theta = 10000.0 inter_size = 13824 norm_eps = 1e-06 attn_bias = 0 start_id = 1 end_id = 2 weight_type = int4 group_size = 128 max_batch_size = 32 max_context_token_num = 4 session_len = 2056 step_length = 1 cache_max_entry_count = 48 cache_chunk_size = 1 use_context_fmha = 1 quant_policy = 0 tensor_para_size = 1 max_position_embeddings = 0 use_dynamic_ntk = 0 use_logn_attn = 0