program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12583040))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12648640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794432))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18956672))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18973120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63013376))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63242816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107283072))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151552768))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151618368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164201344))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164266944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167412736))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167429184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170574976))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214631680))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214861120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258901376))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259130816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303171072))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303236672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315819648))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315885248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031040))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319047488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322193280))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322209728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366249984))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366479424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410519680))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410749120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454789376))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454854976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467437952))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467503552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470649344))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473811584))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473828032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517868288))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518097728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562137984))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562367424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606407680))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; int32 var_38 = const()[name = string("op_38"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_147_axis_0 = const()[name = string("op_147_axis_0"), val = int32(1)]; int32 var_147_batch_dims_0 = const()[name = string("op_147_batch_dims_0"), val = int32(0)]; bool var_147_validate_indices_0 = const()[name = string("op_147_validate_indices_0"), val = bool(false)]; tensor var_43_to_fp16 = const()[name = string("op_43_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606473280)))]; tensor var_147_cast_fp16 = gather(axis = var_147_axis_0, batch_dims = var_147_batch_dims_0, indices = select_0, validate_indices = var_147_validate_indices_0, x = var_43_to_fp16)[name = string("op_147_cast_fp16")]; tensor var_148 = const()[name = string("op_148"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_148, x = var_147_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_152_axis_0 = const()[name = string("op_152_axis_0"), val = int32(1)]; int32 var_152_batch_dims_0 = const()[name = string("op_152_batch_dims_0"), val = int32(0)]; bool var_152_validate_indices_0 = const()[name = string("op_152_validate_indices_0"), val = bool(false)]; tensor var_37_to_fp16 = const()[name = string("op_37_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640027776)))]; tensor var_152_cast_fp16 = gather(axis = var_152_axis_0, batch_dims = var_152_batch_dims_0, indices = select_0, validate_indices = var_152_validate_indices_0, x = var_37_to_fp16)[name = string("op_152_cast_fp16")]; tensor var_153 = const()[name = string("op_153"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_153, x = var_152_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_161_axes_0 = const()[name = string("op_161_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673582272)))]; fp16 var_33_to_fp16 = const()[name = string("op_33_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_161_cast_fp16 = layer_norm(axes = var_161_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_161_cast_fp16")]; tensor var_164 = const()[name = string("op_164"), val = tensor([0, 2, 1])]; tensor var_166_axes_0 = const()[name = string("op_166_axes_0"), val = tensor([2])]; tensor var_165 = transpose(perm = var_164, x = var_161_cast_fp16)[name = string("transpose_15")]; tensor var_166 = expand_dims(axes = var_166_axes_0, x = var_165)[name = string("op_166")]; string var_173_pad_type_0 = const()[name = string("op_173_pad_type_0"), val = string("valid")]; tensor var_173_strides_0 = const()[name = string("op_173_strides_0"), val = tensor([1, 1])]; tensor var_173_pad_0 = const()[name = string("op_173_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_173_dilations_0 = const()[name = string("op_173_dilations_0"), val = tensor([1, 1])]; int32 var_173_groups_0 = const()[name = string("op_173_groups_0"), val = int32(1)]; tensor var_173 = conv(dilations = var_173_dilations_0, groups = var_173_groups_0, pad = var_173_pad_0, pad_type = var_173_pad_type_0, strides = var_173_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_166)[name = string("op_173")]; tensor var_174 = const()[name = string("op_174"), val = tensor([1, 32, 1, 128])]; tensor var_175 = reshape(shape = var_174, x = var_173)[name = string("op_175")]; string var_182_pad_type_0 = const()[name = string("op_182_pad_type_0"), val = string("valid")]; tensor var_182_strides_0 = const()[name = string("op_182_strides_0"), val = tensor([1, 1])]; tensor var_182_pad_0 = const()[name = string("op_182_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_182_dilations_0 = const()[name = string("op_182_dilations_0"), val = tensor([1, 1])]; int32 var_182_groups_0 = const()[name = string("op_182_groups_0"), val = int32(1)]; tensor var_182 = conv(dilations = var_182_dilations_0, groups = var_182_groups_0, pad = var_182_pad_0, pad_type = var_182_pad_type_0, strides = var_182_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_166)[name = string("op_182")]; tensor var_183 = const()[name = string("op_183"), val = tensor([1, 8, 1, 128])]; tensor var_184 = reshape(shape = var_183, x = var_182)[name = string("op_184")]; string var_191_pad_type_0 = const()[name = string("op_191_pad_type_0"), val = string("valid")]; tensor var_191_strides_0 = const()[name = string("op_191_strides_0"), val = tensor([1, 1])]; tensor var_191_pad_0 = const()[name = string("op_191_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_191_dilations_0 = const()[name = string("op_191_dilations_0"), val = tensor([1, 1])]; int32 var_191_groups_0 = const()[name = string("op_191_groups_0"), val = int32(1)]; tensor var_191 = conv(dilations = var_191_dilations_0, groups = var_191_groups_0, pad = var_191_pad_0, pad_type = var_191_pad_type_0, strides = var_191_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_166)[name = string("op_191")]; tensor var_192 = const()[name = string("op_192"), val = tensor([1, 8, 1, 128])]; tensor var_193 = reshape(shape = var_192, x = var_191)[name = string("op_193")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_175)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_175)[name = string("x2_1")]; tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 64])]; tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 64])]; tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; tensor var_207_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_207_cast_fp16")]; tensor var_208_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_208_cast_fp16")]; tensor var_209_cast_fp16 = sub(x = var_207_cast_fp16, y = var_208_cast_fp16)[name = string("op_209_cast_fp16")]; tensor var_210_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_210_cast_fp16")]; tensor var_211_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_211_cast_fp16")]; tensor var_212_cast_fp16 = add(x = var_210_cast_fp16, y = var_211_cast_fp16)[name = string("op_212_cast_fp16")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_38, interleave = rotated_1_interleave_0, values = (var_209_cast_fp16, var_212_cast_fp16))[name = string("rotated_1_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_184)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_184)[name = string("x2_3")]; tensor var_228_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_228_cast_fp16")]; tensor var_229_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_229_cast_fp16")]; tensor var_230_cast_fp16 = sub(x = var_228_cast_fp16, y = var_229_cast_fp16)[name = string("op_230_cast_fp16")]; tensor var_231_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_231_cast_fp16")]; tensor var_232_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_232_cast_fp16")]; tensor var_233_cast_fp16 = add(x = var_231_cast_fp16, y = var_232_cast_fp16)[name = string("op_233_cast_fp16")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_38, interleave = rotated_3_interleave_0, values = (var_230_cast_fp16, var_233_cast_fp16))[name = string("rotated_3_cast_fp16")]; int32 var_237 = const()[name = string("op_237"), val = int32(1)]; tensor var_238 = add(x = current_pos, y = var_237)[name = string("op_238")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([4])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([5])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_238, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_8 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([36])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([37])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_238, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_193, x = coreml_update_state_8)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_9 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_253_begin_0 = const()[name = string("op_253_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_253_end_0 = const()[name = string("op_253_end_0"), val = tensor([5, 8, 1024, 128])]; tensor var_253_end_mask_0 = const()[name = string("op_253_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_253_cast_fp16 = slice_by_index(begin = var_253_begin_0, end = var_253_end_0, end_mask = var_253_end_mask_0, x = coreml_update_state_9)[name = string("op_253_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_253_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_255_begin_0 = const()[name = string("op_255_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_255_end_0 = const()[name = string("op_255_end_0"), val = tensor([37, 8, 1024, 128])]; tensor var_255_end_mask_0 = const()[name = string("op_255_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = coreml_update_state_9)[name = string("op_255_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_255_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_264 = const()[name = string("op_264"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_264, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_268 = const()[name = string("op_268"), val = tensor([1, -1, 1024, 128])]; tensor key_states_3_cast_fp16 = reshape(shape = var_268, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_271 = const()[name = string("op_271"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_271, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_275 = const()[name = string("op_275"), val = tensor([1, -1, 1024, 128])]; tensor value_states_3_cast_fp16 = reshape(shape = var_275, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_278_transpose_x_1 = const()[name = string("op_278_transpose_x_1"), val = bool(false)]; bool var_278_transpose_y_1 = const()[name = string("op_278_transpose_y_1"), val = bool(true)]; tensor var_278_cast_fp16 = matmul(transpose_x = var_278_transpose_x_1, transpose_y = var_278_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_278_cast_fp16")]; fp16 var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_278_cast_fp16, y = var_279_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_290_axes_0 = const()[name = string("op_290_axes_0"), val = tensor([-1])]; bool var_290_keep_dims_0 = const()[name = string("op_290_keep_dims_0"), val = bool(true)]; tensor var_290_cast_fp16 = reduce_sum(axes = var_290_axes_0, keep_dims = var_290_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_290_cast_fp16")]; tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_290_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_293_perm_0 = const()[name = string("op_293_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_295 = const()[name = string("op_295"), val = tensor([1, 1, 4096])]; tensor var_293_cast_fp16 = transpose(perm = var_293_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_14")]; tensor input_5_cast_fp16 = reshape(shape = var_295, x = var_293_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686173504))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686239104)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_306_axes_0 = const()[name = string("op_306_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686247360)))]; tensor var_306_cast_fp16 = layer_norm(axes = var_306_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_306_cast_fp16")]; tensor var_313 = const()[name = string("op_313"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_314 = transpose(perm = var_313, x = var_306_cast_fp16)[name = string("transpose_13")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_314)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_336_axes_0 = const()[name = string("op_336_axes_0"), val = tensor([2])]; tensor var_336 = squeeze(axes = var_336_axes_0, x = hidden_states_7)[name = string("op_336")]; tensor var_337 = const()[name = string("op_337"), val = tensor([0, 2, 1])]; tensor var_338 = transpose(perm = var_337, x = var_336)[name = string("transpose_12")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_338)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_346_axes_0 = const()[name = string("op_346_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686255616)))]; tensor var_346_cast_fp16 = layer_norm(axes = var_346_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_346_cast_fp16")]; tensor var_349 = const()[name = string("op_349"), val = tensor([0, 2, 1])]; tensor var_351_axes_0 = const()[name = string("op_351_axes_0"), val = tensor([2])]; tensor var_350 = transpose(perm = var_349, x = var_346_cast_fp16)[name = string("transpose_11")]; tensor var_351 = expand_dims(axes = var_351_axes_0, x = var_350)[name = string("op_351")]; string var_358_pad_type_0 = const()[name = string("op_358_pad_type_0"), val = string("valid")]; tensor var_358_strides_0 = const()[name = string("op_358_strides_0"), val = tensor([1, 1])]; tensor var_358_pad_0 = const()[name = string("op_358_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_358_dilations_0 = const()[name = string("op_358_dilations_0"), val = tensor([1, 1])]; int32 var_358_groups_0 = const()[name = string("op_358_groups_0"), val = int32(1)]; tensor var_358 = conv(dilations = var_358_dilations_0, groups = var_358_groups_0, pad = var_358_pad_0, pad_type = var_358_pad_type_0, strides = var_358_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_351)[name = string("op_358")]; tensor var_359 = const()[name = string("op_359"), val = tensor([1, 32, 1, 128])]; tensor var_360 = reshape(shape = var_359, x = var_358)[name = string("op_360")]; string var_367_pad_type_0 = const()[name = string("op_367_pad_type_0"), val = string("valid")]; tensor var_367_strides_0 = const()[name = string("op_367_strides_0"), val = tensor([1, 1])]; tensor var_367_pad_0 = const()[name = string("op_367_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_367_dilations_0 = const()[name = string("op_367_dilations_0"), val = tensor([1, 1])]; int32 var_367_groups_0 = const()[name = string("op_367_groups_0"), val = int32(1)]; tensor var_367 = conv(dilations = var_367_dilations_0, groups = var_367_groups_0, pad = var_367_pad_0, pad_type = var_367_pad_type_0, strides = var_367_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_351)[name = string("op_367")]; tensor var_368 = const()[name = string("op_368"), val = tensor([1, 8, 1, 128])]; tensor var_369 = reshape(shape = var_368, x = var_367)[name = string("op_369")]; string var_376_pad_type_0 = const()[name = string("op_376_pad_type_0"), val = string("valid")]; tensor var_376_strides_0 = const()[name = string("op_376_strides_0"), val = tensor([1, 1])]; tensor var_376_pad_0 = const()[name = string("op_376_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_376_dilations_0 = const()[name = string("op_376_dilations_0"), val = tensor([1, 1])]; int32 var_376_groups_0 = const()[name = string("op_376_groups_0"), val = int32(1)]; tensor var_376 = conv(dilations = var_376_dilations_0, groups = var_376_groups_0, pad = var_376_pad_0, pad_type = var_376_pad_type_0, strides = var_376_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_351)[name = string("op_376")]; tensor var_377 = const()[name = string("op_377"), val = tensor([1, 8, 1, 128])]; tensor var_378 = reshape(shape = var_377, x = var_376)[name = string("op_378")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_360)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_360)[name = string("x2_5")]; tensor var_392_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_392_cast_fp16")]; tensor var_393_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_393_cast_fp16")]; tensor var_394_cast_fp16 = sub(x = var_392_cast_fp16, y = var_393_cast_fp16)[name = string("op_394_cast_fp16")]; tensor var_395_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_395_cast_fp16")]; tensor var_396_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_396_cast_fp16")]; tensor var_397_cast_fp16 = add(x = var_395_cast_fp16, y = var_396_cast_fp16)[name = string("op_397_cast_fp16")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_38, interleave = rotated_5_interleave_0, values = (var_394_cast_fp16, var_397_cast_fp16))[name = string("rotated_5_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_369)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_369)[name = string("x2_7")]; tensor var_413_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_413_cast_fp16")]; tensor var_414_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_414_cast_fp16")]; tensor var_415_cast_fp16 = sub(x = var_413_cast_fp16, y = var_414_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_416_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_416_cast_fp16")]; tensor var_417_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_417_cast_fp16")]; tensor var_418_cast_fp16 = add(x = var_416_cast_fp16, y = var_417_cast_fp16)[name = string("op_418_cast_fp16")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7_cast_fp16 = concat(axis = var_38, interleave = rotated_7_interleave_0, values = (var_415_cast_fp16, var_418_cast_fp16))[name = string("rotated_7_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([5])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([6])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_238, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_9)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_10 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([37])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([38])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_238, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_378, x = coreml_update_state_10)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_11 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_438_begin_0 = const()[name = string("op_438_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_438_end_0 = const()[name = string("op_438_end_0"), val = tensor([6, 8, 1024, 128])]; tensor var_438_end_mask_0 = const()[name = string("op_438_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_438_cast_fp16 = slice_by_index(begin = var_438_begin_0, end = var_438_end_0, end_mask = var_438_end_mask_0, x = coreml_update_state_11)[name = string("op_438_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_438_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_440_begin_0 = const()[name = string("op_440_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_440_end_0 = const()[name = string("op_440_end_0"), val = tensor([38, 8, 1024, 128])]; tensor var_440_end_mask_0 = const()[name = string("op_440_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = coreml_update_state_11)[name = string("op_440_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_440_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_449 = const()[name = string("op_449"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_449, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_453 = const()[name = string("op_453"), val = tensor([1, -1, 1024, 128])]; tensor key_states_7_cast_fp16 = reshape(shape = var_453, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_456 = const()[name = string("op_456"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_456, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_460 = const()[name = string("op_460"), val = tensor([1, -1, 1024, 128])]; tensor value_states_7_cast_fp16 = reshape(shape = var_460, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; bool var_463_transpose_x_1 = const()[name = string("op_463_transpose_x_1"), val = bool(false)]; bool var_463_transpose_y_1 = const()[name = string("op_463_transpose_y_1"), val = bool(true)]; tensor var_463_cast_fp16 = matmul(transpose_x = var_463_transpose_x_1, transpose_y = var_463_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_463_cast_fp16")]; fp16 var_464_to_fp16 = const()[name = string("op_464_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_5_cast_fp16 = mul(x = var_463_cast_fp16, y = var_464_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_475_axes_0 = const()[name = string("op_475_axes_0"), val = tensor([-1])]; bool var_475_keep_dims_0 = const()[name = string("op_475_keep_dims_0"), val = bool(true)]; tensor var_475_cast_fp16 = reduce_sum(axes = var_475_axes_0, keep_dims = var_475_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_475_cast_fp16")]; tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_475_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; tensor var_478_perm_0 = const()[name = string("op_478_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_480 = const()[name = string("op_480"), val = tensor([1, 1, 4096])]; tensor var_478_cast_fp16 = transpose(perm = var_478_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_10")]; tensor input_19_cast_fp16 = reshape(shape = var_480, x = var_478_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686263872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698846848))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_491_axes_0 = const()[name = string("op_491_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698912448)))]; tensor var_491_cast_fp16 = layer_norm(axes = var_491_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_491_cast_fp16")]; tensor var_498 = const()[name = string("op_498"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_499 = transpose(perm = var_498, x = var_491_cast_fp16)[name = string("transpose_9")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_499)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_521_axes_0 = const()[name = string("op_521_axes_0"), val = tensor([2])]; tensor var_521 = squeeze(axes = var_521_axes_0, x = hidden_states_15)[name = string("op_521")]; tensor var_522 = const()[name = string("op_522"), val = tensor([0, 2, 1])]; tensor var_523 = transpose(perm = var_522, x = var_521)[name = string("transpose_8")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_523)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_531_axes_0 = const()[name = string("op_531_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698920704)))]; tensor var_531_cast_fp16 = layer_norm(axes = var_531_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_531_cast_fp16")]; tensor var_534 = const()[name = string("op_534"), val = tensor([0, 2, 1])]; tensor var_536_axes_0 = const()[name = string("op_536_axes_0"), val = tensor([2])]; tensor var_535 = transpose(perm = var_534, x = var_531_cast_fp16)[name = string("transpose_7")]; tensor var_536 = expand_dims(axes = var_536_axes_0, x = var_535)[name = string("op_536")]; string var_543_pad_type_0 = const()[name = string("op_543_pad_type_0"), val = string("valid")]; tensor var_543_strides_0 = const()[name = string("op_543_strides_0"), val = tensor([1, 1])]; tensor var_543_pad_0 = const()[name = string("op_543_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_543_dilations_0 = const()[name = string("op_543_dilations_0"), val = tensor([1, 1])]; int32 var_543_groups_0 = const()[name = string("op_543_groups_0"), val = int32(1)]; tensor var_543 = conv(dilations = var_543_dilations_0, groups = var_543_groups_0, pad = var_543_pad_0, pad_type = var_543_pad_type_0, strides = var_543_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_536)[name = string("op_543")]; tensor var_544 = const()[name = string("op_544"), val = tensor([1, 32, 1, 128])]; tensor var_545 = reshape(shape = var_544, x = var_543)[name = string("op_545")]; string var_552_pad_type_0 = const()[name = string("op_552_pad_type_0"), val = string("valid")]; tensor var_552_strides_0 = const()[name = string("op_552_strides_0"), val = tensor([1, 1])]; tensor var_552_pad_0 = const()[name = string("op_552_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_552_dilations_0 = const()[name = string("op_552_dilations_0"), val = tensor([1, 1])]; int32 var_552_groups_0 = const()[name = string("op_552_groups_0"), val = int32(1)]; tensor var_552 = conv(dilations = var_552_dilations_0, groups = var_552_groups_0, pad = var_552_pad_0, pad_type = var_552_pad_type_0, strides = var_552_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_536)[name = string("op_552")]; tensor var_553 = const()[name = string("op_553"), val = tensor([1, 8, 1, 128])]; tensor var_554 = reshape(shape = var_553, x = var_552)[name = string("op_554")]; string var_561_pad_type_0 = const()[name = string("op_561_pad_type_0"), val = string("valid")]; tensor var_561_strides_0 = const()[name = string("op_561_strides_0"), val = tensor([1, 1])]; tensor var_561_pad_0 = const()[name = string("op_561_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_561_dilations_0 = const()[name = string("op_561_dilations_0"), val = tensor([1, 1])]; int32 var_561_groups_0 = const()[name = string("op_561_groups_0"), val = int32(1)]; tensor var_561 = conv(dilations = var_561_dilations_0, groups = var_561_groups_0, pad = var_561_pad_0, pad_type = var_561_pad_type_0, strides = var_561_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_536)[name = string("op_561")]; tensor var_562 = const()[name = string("op_562"), val = tensor([1, 8, 1, 128])]; tensor var_563 = reshape(shape = var_562, x = var_561)[name = string("op_563")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_545)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_545)[name = string("x2_9")]; tensor var_577_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_577_cast_fp16")]; tensor var_578_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_578_cast_fp16")]; tensor var_579_cast_fp16 = sub(x = var_577_cast_fp16, y = var_578_cast_fp16)[name = string("op_579_cast_fp16")]; tensor var_580_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_580_cast_fp16")]; tensor var_581_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_581_cast_fp16")]; tensor var_582_cast_fp16 = add(x = var_580_cast_fp16, y = var_581_cast_fp16)[name = string("op_582_cast_fp16")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9_cast_fp16 = concat(axis = var_38, interleave = rotated_9_interleave_0, values = (var_579_cast_fp16, var_582_cast_fp16))[name = string("rotated_9_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_554)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_554)[name = string("x2_11")]; tensor var_598_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_598_cast_fp16")]; tensor var_599_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_599_cast_fp16")]; tensor var_600_cast_fp16 = sub(x = var_598_cast_fp16, y = var_599_cast_fp16)[name = string("op_600_cast_fp16")]; tensor var_601_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_601_cast_fp16")]; tensor var_602_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_602_cast_fp16")]; tensor var_603_cast_fp16 = add(x = var_601_cast_fp16, y = var_602_cast_fp16)[name = string("op_603_cast_fp16")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11_cast_fp16 = concat(axis = var_38, interleave = rotated_11_interleave_0, values = (var_600_cast_fp16, var_603_cast_fp16))[name = string("rotated_11_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([6])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([7])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_238, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_11)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_12 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([38])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([39])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_238, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_563, x = coreml_update_state_12)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_13 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_623_begin_0 = const()[name = string("op_623_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_623_end_0 = const()[name = string("op_623_end_0"), val = tensor([7, 8, 1024, 128])]; tensor var_623_end_mask_0 = const()[name = string("op_623_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_623_cast_fp16 = slice_by_index(begin = var_623_begin_0, end = var_623_end_0, end_mask = var_623_end_mask_0, x = coreml_update_state_13)[name = string("op_623_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_623_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_625_begin_0 = const()[name = string("op_625_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_625_end_0 = const()[name = string("op_625_end_0"), val = tensor([39, 8, 1024, 128])]; tensor var_625_end_mask_0 = const()[name = string("op_625_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_625_cast_fp16 = slice_by_index(begin = var_625_begin_0, end = var_625_end_0, end_mask = var_625_end_mask_0, x = coreml_update_state_13)[name = string("op_625_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_625_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_634 = const()[name = string("op_634"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_634, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_638 = const()[name = string("op_638"), val = tensor([1, -1, 1024, 128])]; tensor key_states_11_cast_fp16 = reshape(shape = var_638, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_641 = const()[name = string("op_641"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_641, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_645 = const()[name = string("op_645"), val = tensor([1, -1, 1024, 128])]; tensor value_states_11_cast_fp16 = reshape(shape = var_645, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; bool var_648_transpose_x_1 = const()[name = string("op_648_transpose_x_1"), val = bool(false)]; bool var_648_transpose_y_1 = const()[name = string("op_648_transpose_y_1"), val = bool(true)]; tensor var_648_cast_fp16 = matmul(transpose_x = var_648_transpose_x_1, transpose_y = var_648_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_648_cast_fp16")]; fp16 var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_9_cast_fp16 = mul(x = var_648_cast_fp16, y = var_649_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_660_axes_0 = const()[name = string("op_660_axes_0"), val = tensor([-1])]; bool var_660_keep_dims_0 = const()[name = string("op_660_keep_dims_0"), val = bool(true)]; tensor var_660_cast_fp16 = reduce_sum(axes = var_660_axes_0, keep_dims = var_660_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_660_cast_fp16")]; tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_660_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_663_perm_0 = const()[name = string("op_663_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_665 = const()[name = string("op_665"), val = tensor([1, 1, 4096])]; tensor var_663_cast_fp16 = transpose(perm = var_663_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_6")]; tensor input_33_cast_fp16 = reshape(shape = var_665, x = var_663_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711511936))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_676_axes_0 = const()[name = string("op_676_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711577536)))]; tensor var_676_cast_fp16 = layer_norm(axes = var_676_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_676_cast_fp16")]; tensor var_683 = const()[name = string("op_683"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_684 = transpose(perm = var_683, x = var_676_cast_fp16)[name = string("transpose_5")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_684)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_706_axes_0 = const()[name = string("op_706_axes_0"), val = tensor([2])]; tensor var_706 = squeeze(axes = var_706_axes_0, x = hidden_states_23)[name = string("op_706")]; tensor var_707 = const()[name = string("op_707"), val = tensor([0, 2, 1])]; tensor var_708 = transpose(perm = var_707, x = var_706)[name = string("transpose_4")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_708)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_716_axes_0 = const()[name = string("op_716_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711585792)))]; tensor var_716_cast_fp16 = layer_norm(axes = var_716_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_716_cast_fp16")]; tensor var_719 = const()[name = string("op_719"), val = tensor([0, 2, 1])]; tensor var_721_axes_0 = const()[name = string("op_721_axes_0"), val = tensor([2])]; tensor var_720 = transpose(perm = var_719, x = var_716_cast_fp16)[name = string("transpose_3")]; tensor var_721 = expand_dims(axes = var_721_axes_0, x = var_720)[name = string("op_721")]; string var_728_pad_type_0 = const()[name = string("op_728_pad_type_0"), val = string("valid")]; tensor var_728_strides_0 = const()[name = string("op_728_strides_0"), val = tensor([1, 1])]; tensor var_728_pad_0 = const()[name = string("op_728_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_728_dilations_0 = const()[name = string("op_728_dilations_0"), val = tensor([1, 1])]; int32 var_728_groups_0 = const()[name = string("op_728_groups_0"), val = int32(1)]; tensor var_728 = conv(dilations = var_728_dilations_0, groups = var_728_groups_0, pad = var_728_pad_0, pad_type = var_728_pad_type_0, strides = var_728_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_721)[name = string("op_728")]; tensor var_729 = const()[name = string("op_729"), val = tensor([1, 32, 1, 128])]; tensor var_730 = reshape(shape = var_729, x = var_728)[name = string("op_730")]; string var_737_pad_type_0 = const()[name = string("op_737_pad_type_0"), val = string("valid")]; tensor var_737_strides_0 = const()[name = string("op_737_strides_0"), val = tensor([1, 1])]; tensor var_737_pad_0 = const()[name = string("op_737_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_737_dilations_0 = const()[name = string("op_737_dilations_0"), val = tensor([1, 1])]; int32 var_737_groups_0 = const()[name = string("op_737_groups_0"), val = int32(1)]; tensor var_737 = conv(dilations = var_737_dilations_0, groups = var_737_groups_0, pad = var_737_pad_0, pad_type = var_737_pad_type_0, strides = var_737_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_721)[name = string("op_737")]; tensor var_738 = const()[name = string("op_738"), val = tensor([1, 8, 1, 128])]; tensor var_739 = reshape(shape = var_738, x = var_737)[name = string("op_739")]; string var_746_pad_type_0 = const()[name = string("op_746_pad_type_0"), val = string("valid")]; tensor var_746_strides_0 = const()[name = string("op_746_strides_0"), val = tensor([1, 1])]; tensor var_746_pad_0 = const()[name = string("op_746_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_746_dilations_0 = const()[name = string("op_746_dilations_0"), val = tensor([1, 1])]; int32 var_746_groups_0 = const()[name = string("op_746_groups_0"), val = int32(1)]; tensor var_746 = conv(dilations = var_746_dilations_0, groups = var_746_groups_0, pad = var_746_pad_0, pad_type = var_746_pad_type_0, strides = var_746_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_721)[name = string("op_746")]; tensor var_747 = const()[name = string("op_747"), val = tensor([1, 8, 1, 128])]; tensor var_748 = reshape(shape = var_747, x = var_746)[name = string("op_748")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_730)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_730)[name = string("x2_13")]; tensor var_762_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_762_cast_fp16")]; tensor var_763_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_763_cast_fp16")]; tensor var_764_cast_fp16 = sub(x = var_762_cast_fp16, y = var_763_cast_fp16)[name = string("op_764_cast_fp16")]; tensor var_765_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_765_cast_fp16")]; tensor var_766_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_766_cast_fp16")]; tensor var_767_cast_fp16 = add(x = var_765_cast_fp16, y = var_766_cast_fp16)[name = string("op_767_cast_fp16")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13_cast_fp16 = concat(axis = var_38, interleave = rotated_13_interleave_0, values = (var_764_cast_fp16, var_767_cast_fp16))[name = string("rotated_13_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_739)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_739)[name = string("x2")]; tensor var_783_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_783_cast_fp16")]; tensor var_784_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_784_cast_fp16")]; tensor var_785_cast_fp16 = sub(x = var_783_cast_fp16, y = var_784_cast_fp16)[name = string("op_785_cast_fp16")]; tensor var_786_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_786_cast_fp16")]; tensor var_787_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_787_cast_fp16")]; tensor var_788_cast_fp16 = add(x = var_786_cast_fp16, y = var_787_cast_fp16)[name = string("op_788_cast_fp16")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated_cast_fp16 = concat(axis = var_38, interleave = rotated_interleave_0, values = (var_785_cast_fp16, var_788_cast_fp16))[name = string("rotated_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([7])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([8])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_238, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_cast_fp16, x = coreml_update_state_13)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([39])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([40])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_238, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_748, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_808_begin_0 = const()[name = string("op_808_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_808_end_0 = const()[name = string("op_808_end_0"), val = tensor([8, 8, 1024, 128])]; tensor var_808_end_mask_0 = const()[name = string("op_808_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_808_cast_fp16 = slice_by_index(begin = var_808_begin_0, end = var_808_end_0, end_mask = var_808_end_mask_0, x = coreml_update_state_15)[name = string("op_808_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_808_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_810_begin_0 = const()[name = string("op_810_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_810_end_0 = const()[name = string("op_810_end_0"), val = tensor([40, 8, 1024, 128])]; tensor var_810_end_mask_0 = const()[name = string("op_810_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_810_cast_fp16 = slice_by_index(begin = var_810_begin_0, end = var_810_end_0, end_mask = var_810_end_mask_0, x = coreml_update_state_15)[name = string("op_810_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_810_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_819 = const()[name = string("op_819"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_819, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_823 = const()[name = string("op_823"), val = tensor([1, -1, 1024, 128])]; tensor key_states_cast_fp16 = reshape(shape = var_823, x = x_97_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_826 = const()[name = string("op_826"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_826, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_830 = const()[name = string("op_830"), val = tensor([1, -1, 1024, 128])]; tensor value_states_cast_fp16 = reshape(shape = var_830, x = x_103_cast_fp16)[name = string("value_states_cast_fp16")]; bool var_833_transpose_x_1 = const()[name = string("op_833_transpose_x_1"), val = bool(false)]; bool var_833_transpose_y_1 = const()[name = string("op_833_transpose_y_1"), val = bool(true)]; tensor var_833_cast_fp16 = matmul(transpose_x = var_833_transpose_x_1, transpose_y = var_833_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_cast_fp16)[name = string("op_833_cast_fp16")]; fp16 var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_13_cast_fp16 = mul(x = var_833_cast_fp16, y = var_834_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_845_axes_0 = const()[name = string("op_845_axes_0"), val = tensor([-1])]; bool var_845_keep_dims_0 = const()[name = string("op_845_keep_dims_0"), val = bool(true)]; tensor var_845_cast_fp16 = reduce_sum(axes = var_845_axes_0, keep_dims = var_845_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_845_cast_fp16")]; tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_845_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_19_cast_fp16")]; tensor var_848_perm_0 = const()[name = string("op_848_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_850 = const()[name = string("op_850"), val = tensor([1, 1, 4096])]; tensor var_848_cast_fp16 = transpose(perm = var_848_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_2")]; tensor input_47_cast_fp16 = reshape(shape = var_850, x = var_848_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711594048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724177024))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_861_axes_0 = const()[name = string("op_861_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724242624)))]; tensor var_861_cast_fp16 = layer_norm(axes = var_861_axes_0, epsilon = var_33_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_861_cast_fp16")]; tensor var_868 = const()[name = string("op_868"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_869 = transpose(perm = var_868, x = var_861_cast_fp16)[name = string("transpose_1")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_869)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states")]; tensor gate_states = silu(x = input_53)[name = string("gate_states")]; tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; tensor var_891_axes_0 = const()[name = string("op_891_axes_0"), val = tensor([2])]; tensor var_891 = squeeze(axes = var_891_axes_0, x = hidden_states_1)[name = string("op_891")]; tensor var_892 = const()[name = string("op_892"), val = tensor([0, 2, 1])]; tensor var_893 = transpose(perm = var_892, x = var_891)[name = string("transpose_0")]; tensor output_hidden_states = add(x = hidden_states_29_cast_fp16, y = var_893)[name = string("op_894_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12583040))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12648640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794432))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18956672))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18973120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63013376))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63242816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107283072))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151552768))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151618368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164201344))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164266944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167412736))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167429184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170574976))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170591424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214631680))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214861120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258901376))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259130816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303171072))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303236672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315819648))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315885248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031040))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319047488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322193280))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322209728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366249984))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366479424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410519680))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410749120))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454789376))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454854976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467437952))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467503552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470649344))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470665792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473811584))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473828032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517868288))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518097728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562137984))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562367424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606407680))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; int32 var_33 = const()[name = string("op_33"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_150_axis_0 = const()[name = string("op_150_axis_0"), val = int32(1)]; int32 var_150_batch_dims_0 = const()[name = string("op_150_batch_dims_0"), val = int32(0)]; bool var_150_validate_indices_0 = const()[name = string("op_150_validate_indices_0"), val = bool(false)]; tensor var_44_to_fp16 = const()[name = string("op_44_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640027776)))]; tensor var_150_cast_fp16 = gather(axis = var_150_axis_0, batch_dims = var_150_batch_dims_0, indices = select_0, validate_indices = var_150_validate_indices_0, x = var_44_to_fp16)[name = string("op_150_cast_fp16")]; tensor var_151 = const()[name = string("op_151"), val = tensor([1, 256, 1, 128])]; tensor cos_1_cast_fp16 = reshape(shape = var_151, x = var_150_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_155_axis_0 = const()[name = string("op_155_axis_0"), val = int32(1)]; int32 var_155_batch_dims_0 = const()[name = string("op_155_batch_dims_0"), val = int32(0)]; bool var_155_validate_indices_0 = const()[name = string("op_155_validate_indices_0"), val = bool(false)]; tensor var_39_to_fp16 = const()[name = string("op_39_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606473280)))]; tensor var_155_cast_fp16 = gather(axis = var_155_axis_0, batch_dims = var_155_batch_dims_0, indices = select_0, validate_indices = var_155_validate_indices_0, x = var_39_to_fp16)[name = string("op_155_cast_fp16")]; tensor var_156 = const()[name = string("op_156"), val = tensor([1, 256, 1, 128])]; tensor sin_1_cast_fp16 = reshape(shape = var_156, x = var_155_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_166_axes_0 = const()[name = string("op_166_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673582272)))]; fp16 var_35_to_fp16 = const()[name = string("op_35_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_166_cast_fp16 = layer_norm(axes = var_166_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_166_cast_fp16")]; tensor var_170 = const()[name = string("op_170"), val = tensor([0, 2, 1])]; tensor var_172_axes_0 = const()[name = string("op_172_axes_0"), val = tensor([2])]; tensor var_171 = transpose(perm = var_170, x = var_166_cast_fp16)[name = string("transpose_29")]; tensor var_172 = expand_dims(axes = var_172_axes_0, x = var_171)[name = string("op_172")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_172)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_172)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_172)[name = string("value_states_1")]; tensor var_192 = const()[name = string("op_192"), val = tensor([1, 32, 128, 256])]; tensor var_193 = reshape(shape = var_192, x = query_states_1)[name = string("op_193")]; tensor var_194 = const()[name = string("op_194"), val = tensor([0, 1, 3, 2])]; tensor var_196 = const()[name = string("op_196"), val = tensor([1, 8, 128, 256])]; tensor var_197 = reshape(shape = var_196, x = key_states_1)[name = string("op_197")]; tensor var_198 = const()[name = string("op_198"), val = tensor([0, 1, 3, 2])]; tensor var_200 = const()[name = string("op_200"), val = tensor([1, 8, 128, 256])]; tensor var_201 = reshape(shape = var_200, x = value_states_1)[name = string("op_201")]; tensor var_202 = const()[name = string("op_202"), val = tensor([0, 1, 3, 2])]; tensor var_204 = const()[name = string("op_204"), val = tensor([0, 2, 1, 3])]; tensor var_206 = const()[name = string("op_206"), val = tensor([0, 2, 1, 3])]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 256, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_1 = transpose(perm = var_194, x = var_193)[name = string("transpose_28")]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 256, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 256, 64])]; tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_5 = transpose(perm = var_204, x = cos_1_cast_fp16)[name = string("transpose_27")]; tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 256, 64])]; tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_5 = transpose(perm = var_206, x = sin_1_cast_fp16)[name = string("transpose_26")]; tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; tensor var_220 = mul(x = x1_1, y = cos_7)[name = string("op_220")]; tensor var_221 = mul(x = x2_1, y = sin_7)[name = string("op_221")]; tensor var_222 = sub(x = var_220, y = var_221)[name = string("op_222")]; tensor var_223 = mul(x = x2_1, y = cos_7)[name = string("op_223")]; tensor var_224 = mul(x = x1_1, y = sin_7)[name = string("op_224")]; tensor var_225 = add(x = var_223, y = var_224)[name = string("op_225")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1 = concat(axis = var_33, interleave = rotated_1_interleave_0, values = (var_222, var_225))[name = string("rotated_1")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 256, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_5 = transpose(perm = var_198, x = var_197)[name = string("transpose_25")]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 256, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; tensor var_241 = mul(x = x1_3, y = cos_7)[name = string("op_241")]; tensor var_242 = mul(x = x2_3, y = sin_7)[name = string("op_242")]; tensor var_243 = sub(x = var_241, y = var_242)[name = string("op_243")]; tensor var_244 = mul(x = x2_3, y = cos_7)[name = string("op_244")]; tensor var_245 = mul(x = x1_3, y = sin_7)[name = string("op_245")]; tensor var_246 = add(x = var_244, y = var_245)[name = string("op_246")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3 = concat(axis = var_33, interleave = rotated_3_interleave_0, values = (var_243, var_246))[name = string("rotated_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([256])]; tensor var_255 = add(x = current_pos, y = seq_length_1)[name = string("op_255")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([4])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([5])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_255, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_8 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([36])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([37])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_255, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_202, x = var_201)[name = string("transpose_24")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_8)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_9 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_269_begin_0 = const()[name = string("op_269_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_269_end_0 = const()[name = string("op_269_end_0"), val = tensor([5, 8, 1024, 128])]; tensor var_269_end_mask_0 = const()[name = string("op_269_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_269_cast_fp16 = slice_by_index(begin = var_269_begin_0, end = var_269_end_0, end_mask = var_269_end_mask_0, x = coreml_update_state_9)[name = string("op_269_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_269_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_271_begin_0 = const()[name = string("op_271_begin_0"), val = tensor([36, 0, 0, 0])]; tensor var_271_end_0 = const()[name = string("op_271_end_0"), val = tensor([37, 8, 1024, 128])]; tensor var_271_end_mask_0 = const()[name = string("op_271_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = coreml_update_state_9)[name = string("op_271_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_271_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_280 = const()[name = string("op_280"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_280, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_284 = const()[name = string("op_284"), val = tensor([1, -1, 1024, 128])]; tensor var_285_cast_fp16 = reshape(shape = var_284, x = x_13_cast_fp16)[name = string("op_285_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_287 = const()[name = string("op_287"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_287, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; bool var_294_transpose_x_0 = const()[name = string("op_294_transpose_x_0"), val = bool(false)]; bool var_294_transpose_y_0 = const()[name = string("op_294_transpose_y_0"), val = bool(true)]; tensor var_294_cast_fp16 = matmul(transpose_x = var_294_transpose_x_0, transpose_y = var_294_transpose_y_0, x = rotated_1, y = var_285_cast_fp16)[name = string("op_294_cast_fp16")]; fp16 var_295_to_fp16 = const()[name = string("op_295_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_294_cast_fp16, y = var_295_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_306_axes_0 = const()[name = string("op_306_axes_0"), val = tensor([-1])]; bool var_306_keep_dims_0 = const()[name = string("op_306_keep_dims_0"), val = bool(true)]; tensor var_306_cast_fp16 = reduce_sum(axes = var_306_axes_0, keep_dims = var_306_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_306_cast_fp16")]; tensor var_307_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_306_cast_fp16)[name = string("op_307_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 256, 1024])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_307_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1024, 128])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 256, 128])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_310_perm_0 = const()[name = string("op_310_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_312 = const()[name = string("op_312"), val = tensor([1, 256, 4096])]; tensor var_310_cast_fp16 = transpose(perm = var_310_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_23")]; tensor input_5_cast_fp16 = reshape(shape = var_312, x = var_310_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673590528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686173504))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686239104)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_323_axes_0 = const()[name = string("op_323_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686247360)))]; tensor var_323_cast_fp16 = layer_norm(axes = var_323_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_323_cast_fp16")]; tensor var_330 = const()[name = string("op_330"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_331 = transpose(perm = var_330, x = var_323_cast_fp16)[name = string("transpose_22")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_331)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_353_axes_0 = const()[name = string("op_353_axes_0"), val = tensor([2])]; tensor var_353 = squeeze(axes = var_353_axes_0, x = hidden_states_7)[name = string("op_353")]; tensor var_354 = const()[name = string("op_354"), val = tensor([0, 2, 1])]; tensor var_355 = transpose(perm = var_354, x = var_353)[name = string("transpose_21")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_355)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_363_axes_0 = const()[name = string("op_363_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686255616)))]; tensor var_363_cast_fp16 = layer_norm(axes = var_363_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_363_cast_fp16")]; tensor var_367 = const()[name = string("op_367"), val = tensor([0, 2, 1])]; tensor var_369_axes_0 = const()[name = string("op_369_axes_0"), val = tensor([2])]; tensor var_368 = transpose(perm = var_367, x = var_363_cast_fp16)[name = string("transpose_20")]; tensor var_369 = expand_dims(axes = var_369_axes_0, x = var_368)[name = string("op_369")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_369)[name = string("query_states_5")]; string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_369)[name = string("key_states_7")]; string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_369)[name = string("value_states_7")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 32, 128, 256])]; tensor var_390 = reshape(shape = var_389, x = query_states_5)[name = string("op_390")]; tensor var_391 = const()[name = string("op_391"), val = tensor([0, 1, 3, 2])]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, 8, 128, 256])]; tensor var_394 = reshape(shape = var_393, x = key_states_7)[name = string("op_394")]; tensor var_395 = const()[name = string("op_395"), val = tensor([0, 1, 3, 2])]; tensor var_397 = const()[name = string("op_397"), val = tensor([1, 8, 128, 256])]; tensor var_398 = reshape(shape = var_397, x = value_states_7)[name = string("op_398")]; tensor var_399 = const()[name = string("op_399"), val = tensor([0, 1, 3, 2])]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 256, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_29 = transpose(perm = var_391, x = var_390)[name = string("transpose_19")]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 256, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; tensor var_417 = mul(x = x1_5, y = cos_7)[name = string("op_417")]; tensor var_418 = mul(x = x2_5, y = sin_7)[name = string("op_418")]; tensor var_419 = sub(x = var_417, y = var_418)[name = string("op_419")]; tensor var_420 = mul(x = x2_5, y = cos_7)[name = string("op_420")]; tensor var_421 = mul(x = x1_5, y = sin_7)[name = string("op_421")]; tensor var_422 = add(x = var_420, y = var_421)[name = string("op_422")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5 = concat(axis = var_33, interleave = rotated_5_interleave_0, values = (var_419, var_422))[name = string("rotated_5")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 256, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_33 = transpose(perm = var_395, x = var_394)[name = string("transpose_18")]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 256, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; tensor var_438 = mul(x = x1_7, y = cos_7)[name = string("op_438")]; tensor var_439 = mul(x = x2_7, y = sin_7)[name = string("op_439")]; tensor var_440 = sub(x = var_438, y = var_439)[name = string("op_440")]; tensor var_441 = mul(x = x2_7, y = cos_7)[name = string("op_441")]; tensor var_442 = mul(x = x1_7, y = sin_7)[name = string("op_442")]; tensor var_443 = add(x = var_441, y = var_442)[name = string("op_443")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7 = concat(axis = var_33, interleave = rotated_7_interleave_0, values = (var_440, var_443))[name = string("rotated_7")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([5])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([6])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_255, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_9)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_10 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([37])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([38])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_255, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_9 = transpose(perm = var_399, x = var_398)[name = string("transpose_17")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_10)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_11 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_466_begin_0 = const()[name = string("op_466_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_466_end_0 = const()[name = string("op_466_end_0"), val = tensor([6, 8, 1024, 128])]; tensor var_466_end_mask_0 = const()[name = string("op_466_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = coreml_update_state_11)[name = string("op_466_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_466_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_468_begin_0 = const()[name = string("op_468_begin_0"), val = tensor([37, 0, 0, 0])]; tensor var_468_end_0 = const()[name = string("op_468_end_0"), val = tensor([38, 8, 1024, 128])]; tensor var_468_end_mask_0 = const()[name = string("op_468_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_468_cast_fp16 = slice_by_index(begin = var_468_begin_0, end = var_468_end_0, end_mask = var_468_end_mask_0, x = coreml_update_state_11)[name = string("op_468_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_468_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_477 = const()[name = string("op_477"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_477, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_481 = const()[name = string("op_481"), val = tensor([1, -1, 1024, 128])]; tensor var_482_cast_fp16 = reshape(shape = var_481, x = x_41_cast_fp16)[name = string("op_482_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_484 = const()[name = string("op_484"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_484, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; bool var_491_transpose_x_0 = const()[name = string("op_491_transpose_x_0"), val = bool(false)]; bool var_491_transpose_y_0 = const()[name = string("op_491_transpose_y_0"), val = bool(true)]; tensor var_491_cast_fp16 = matmul(transpose_x = var_491_transpose_x_0, transpose_y = var_491_transpose_y_0, x = rotated_5, y = var_482_cast_fp16)[name = string("op_491_cast_fp16")]; fp16 var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_3_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = string("attn_weights_3_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_503_axes_0 = const()[name = string("op_503_axes_0"), val = tensor([-1])]; bool var_503_keep_dims_0 = const()[name = string("op_503_keep_dims_0"), val = bool(true)]; tensor var_503_cast_fp16 = reduce_sum(axes = var_503_axes_0, keep_dims = var_503_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_503_cast_fp16")]; tensor var_504_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_503_cast_fp16)[name = string("op_504_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 256, 1024])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_504_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1024, 128])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 256, 128])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_507_perm_0 = const()[name = string("op_507_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_509 = const()[name = string("op_509"), val = tensor([1, 256, 4096])]; tensor var_507_cast_fp16 = transpose(perm = var_507_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_16")]; tensor input_19_cast_fp16 = reshape(shape = var_509, x = var_507_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686263872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698846848))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_520_axes_0 = const()[name = string("op_520_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698912448)))]; tensor var_520_cast_fp16 = layer_norm(axes = var_520_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_520_cast_fp16")]; tensor var_527 = const()[name = string("op_527"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_528 = transpose(perm = var_527, x = var_520_cast_fp16)[name = string("transpose_15")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_528)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_550_axes_0 = const()[name = string("op_550_axes_0"), val = tensor([2])]; tensor var_550 = squeeze(axes = var_550_axes_0, x = hidden_states_15)[name = string("op_550")]; tensor var_551 = const()[name = string("op_551"), val = tensor([0, 2, 1])]; tensor var_552 = transpose(perm = var_551, x = var_550)[name = string("transpose_14")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_552)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_560_axes_0 = const()[name = string("op_560_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698920704)))]; tensor var_560_cast_fp16 = layer_norm(axes = var_560_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_560_cast_fp16")]; tensor var_564 = const()[name = string("op_564"), val = tensor([0, 2, 1])]; tensor var_566_axes_0 = const()[name = string("op_566_axes_0"), val = tensor([2])]; tensor var_565 = transpose(perm = var_564, x = var_560_cast_fp16)[name = string("transpose_13")]; tensor var_566 = expand_dims(axes = var_566_axes_0, x = var_565)[name = string("op_566")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_566)[name = string("query_states_9")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_566)[name = string("key_states_13")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_566)[name = string("value_states_13")]; tensor var_586 = const()[name = string("op_586"), val = tensor([1, 32, 128, 256])]; tensor var_587 = reshape(shape = var_586, x = query_states_9)[name = string("op_587")]; tensor var_588 = const()[name = string("op_588"), val = tensor([0, 1, 3, 2])]; tensor var_590 = const()[name = string("op_590"), val = tensor([1, 8, 128, 256])]; tensor var_591 = reshape(shape = var_590, x = key_states_13)[name = string("op_591")]; tensor var_592 = const()[name = string("op_592"), val = tensor([0, 1, 3, 2])]; tensor var_594 = const()[name = string("op_594"), val = tensor([1, 8, 128, 256])]; tensor var_595 = reshape(shape = var_594, x = value_states_13)[name = string("op_595")]; tensor var_596 = const()[name = string("op_596"), val = tensor([0, 1, 3, 2])]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 256, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_57 = transpose(perm = var_588, x = var_587)[name = string("transpose_12")]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 256, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; tensor var_614 = mul(x = x1_9, y = cos_7)[name = string("op_614")]; tensor var_615 = mul(x = x2_9, y = sin_7)[name = string("op_615")]; tensor var_616 = sub(x = var_614, y = var_615)[name = string("op_616")]; tensor var_617 = mul(x = x2_9, y = cos_7)[name = string("op_617")]; tensor var_618 = mul(x = x1_9, y = sin_7)[name = string("op_618")]; tensor var_619 = add(x = var_617, y = var_618)[name = string("op_619")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9 = concat(axis = var_33, interleave = rotated_9_interleave_0, values = (var_616, var_619))[name = string("rotated_9")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 256, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_61 = transpose(perm = var_592, x = var_591)[name = string("transpose_11")]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 256, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; tensor var_635 = mul(x = x1_11, y = cos_7)[name = string("op_635")]; tensor var_636 = mul(x = x2_11, y = sin_7)[name = string("op_636")]; tensor var_637 = sub(x = var_635, y = var_636)[name = string("op_637")]; tensor var_638 = mul(x = x2_11, y = cos_7)[name = string("op_638")]; tensor var_639 = mul(x = x1_11, y = sin_7)[name = string("op_639")]; tensor var_640 = add(x = var_638, y = var_639)[name = string("op_640")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11 = concat(axis = var_33, interleave = rotated_11_interleave_0, values = (var_637, var_640))[name = string("rotated_11")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([6])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([7])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_255, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_11)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_12 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([38])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([39])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_255, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15 = transpose(perm = var_596, x = var_595)[name = string("transpose_10")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_12)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_13 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_663_begin_0 = const()[name = string("op_663_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_663_end_0 = const()[name = string("op_663_end_0"), val = tensor([7, 8, 1024, 128])]; tensor var_663_end_mask_0 = const()[name = string("op_663_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_663_cast_fp16 = slice_by_index(begin = var_663_begin_0, end = var_663_end_0, end_mask = var_663_end_mask_0, x = coreml_update_state_13)[name = string("op_663_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_663_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_665_begin_0 = const()[name = string("op_665_begin_0"), val = tensor([38, 0, 0, 0])]; tensor var_665_end_0 = const()[name = string("op_665_end_0"), val = tensor([39, 8, 1024, 128])]; tensor var_665_end_mask_0 = const()[name = string("op_665_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_665_cast_fp16 = slice_by_index(begin = var_665_begin_0, end = var_665_end_0, end_mask = var_665_end_mask_0, x = coreml_update_state_13)[name = string("op_665_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_665_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_674 = const()[name = string("op_674"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_674, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_678 = const()[name = string("op_678"), val = tensor([1, -1, 1024, 128])]; tensor var_679_cast_fp16 = reshape(shape = var_678, x = x_69_cast_fp16)[name = string("op_679_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_681 = const()[name = string("op_681"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_681, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_688_transpose_x_0 = const()[name = string("op_688_transpose_x_0"), val = bool(false)]; bool var_688_transpose_y_0 = const()[name = string("op_688_transpose_y_0"), val = bool(true)]; tensor var_688_cast_fp16 = matmul(transpose_x = var_688_transpose_x_0, transpose_y = var_688_transpose_y_0, x = rotated_9, y = var_679_cast_fp16)[name = string("op_688_cast_fp16")]; fp16 var_689_to_fp16 = const()[name = string("op_689_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_5_cast_fp16 = mul(x = var_688_cast_fp16, y = var_689_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_700_axes_0 = const()[name = string("op_700_axes_0"), val = tensor([-1])]; bool var_700_keep_dims_0 = const()[name = string("op_700_keep_dims_0"), val = bool(true)]; tensor var_700_cast_fp16 = reduce_sum(axes = var_700_axes_0, keep_dims = var_700_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_700_cast_fp16")]; tensor var_701_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_700_cast_fp16)[name = string("op_701_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 256, 1024])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_701_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1024, 128])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 256, 128])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_704_perm_0 = const()[name = string("op_704_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_706 = const()[name = string("op_706"), val = tensor([1, 256, 4096])]; tensor var_704_cast_fp16 = transpose(perm = var_704_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_9")]; tensor input_33_cast_fp16 = reshape(shape = var_706, x = var_704_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698928960))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711511936))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_717_axes_0 = const()[name = string("op_717_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711577536)))]; tensor var_717_cast_fp16 = layer_norm(axes = var_717_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_717_cast_fp16")]; tensor var_724 = const()[name = string("op_724"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_725 = transpose(perm = var_724, x = var_717_cast_fp16)[name = string("transpose_8")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_725)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_747_axes_0 = const()[name = string("op_747_axes_0"), val = tensor([2])]; tensor var_747 = squeeze(axes = var_747_axes_0, x = hidden_states_23)[name = string("op_747")]; tensor var_748 = const()[name = string("op_748"), val = tensor([0, 2, 1])]; tensor var_749 = transpose(perm = var_748, x = var_747)[name = string("transpose_7")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_749)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_757_axes_0 = const()[name = string("op_757_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711585792)))]; tensor var_757_cast_fp16 = layer_norm(axes = var_757_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_757_cast_fp16")]; tensor var_761 = const()[name = string("op_761"), val = tensor([0, 2, 1])]; tensor var_763_axes_0 = const()[name = string("op_763_axes_0"), val = tensor([2])]; tensor var_762 = transpose(perm = var_761, x = var_757_cast_fp16)[name = string("transpose_6")]; tensor var_763 = expand_dims(axes = var_763_axes_0, x = var_762)[name = string("op_763")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_763)[name = string("query_states_13")]; string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_763)[name = string("key_states_19")]; string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_763)[name = string("value_states_19")]; tensor var_783 = const()[name = string("op_783"), val = tensor([1, 32, 128, 256])]; tensor var_784 = reshape(shape = var_783, x = query_states_13)[name = string("op_784")]; tensor var_785 = const()[name = string("op_785"), val = tensor([0, 1, 3, 2])]; tensor var_787 = const()[name = string("op_787"), val = tensor([1, 8, 128, 256])]; tensor var_788 = reshape(shape = var_787, x = key_states_19)[name = string("op_788")]; tensor var_789 = const()[name = string("op_789"), val = tensor([0, 1, 3, 2])]; tensor var_791 = const()[name = string("op_791"), val = tensor([1, 8, 128, 256])]; tensor var_792 = reshape(shape = var_791, x = value_states_19)[name = string("op_792")]; tensor var_793 = const()[name = string("op_793"), val = tensor([0, 1, 3, 2])]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 256, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_85 = transpose(perm = var_785, x = var_784)[name = string("transpose_5")]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 256, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; tensor var_811 = mul(x = x1_13, y = cos_7)[name = string("op_811")]; tensor var_812 = mul(x = x2_13, y = sin_7)[name = string("op_812")]; tensor var_813 = sub(x = var_811, y = var_812)[name = string("op_813")]; tensor var_814 = mul(x = x2_13, y = cos_7)[name = string("op_814")]; tensor var_815 = mul(x = x1_13, y = sin_7)[name = string("op_815")]; tensor var_816 = add(x = var_814, y = var_815)[name = string("op_816")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13 = concat(axis = var_33, interleave = rotated_13_interleave_0, values = (var_813, var_816))[name = string("rotated_13")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 256, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_89 = transpose(perm = var_789, x = var_788)[name = string("transpose_4")]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_89)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 256, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_89)[name = string("x2")]; tensor var_832 = mul(x = x1, y = cos_7)[name = string("op_832")]; tensor var_833 = mul(x = x2, y = sin_7)[name = string("op_833")]; tensor var_834 = sub(x = var_832, y = var_833)[name = string("op_834")]; tensor var_835 = mul(x = x2, y = cos_7)[name = string("op_835")]; tensor var_836 = mul(x = x1, y = sin_7)[name = string("op_836")]; tensor var_837 = add(x = var_835, y = var_836)[name = string("op_837")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated = concat(axis = var_33, interleave = rotated_interleave_0, values = (var_834, var_837))[name = string("rotated")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([7])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([8])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_255, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated, x = coreml_update_state_13)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([39])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([40])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_255, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_21 = transpose(perm = var_793, x = var_792)[name = string("transpose_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_860_begin_0 = const()[name = string("op_860_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_860_end_0 = const()[name = string("op_860_end_0"), val = tensor([8, 8, 1024, 128])]; tensor var_860_end_mask_0 = const()[name = string("op_860_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_860_cast_fp16 = slice_by_index(begin = var_860_begin_0, end = var_860_end_0, end_mask = var_860_end_mask_0, x = coreml_update_state_15)[name = string("op_860_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_860_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_862_begin_0 = const()[name = string("op_862_begin_0"), val = tensor([39, 0, 0, 0])]; tensor var_862_end_0 = const()[name = string("op_862_end_0"), val = tensor([40, 8, 1024, 128])]; tensor var_862_end_mask_0 = const()[name = string("op_862_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_862_cast_fp16 = slice_by_index(begin = var_862_begin_0, end = var_862_end_0, end_mask = var_862_end_mask_0, x = coreml_update_state_15)[name = string("op_862_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_862_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_871 = const()[name = string("op_871"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_871, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_875 = const()[name = string("op_875"), val = tensor([1, -1, 1024, 128])]; tensor var_876_cast_fp16 = reshape(shape = var_875, x = x_97_cast_fp16)[name = string("op_876_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_878 = const()[name = string("op_878"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_878, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; bool var_885_transpose_x_0 = const()[name = string("op_885_transpose_x_0"), val = bool(false)]; bool var_885_transpose_y_0 = const()[name = string("op_885_transpose_y_0"), val = bool(true)]; tensor var_885_cast_fp16 = matmul(transpose_x = var_885_transpose_x_0, transpose_y = var_885_transpose_y_0, x = rotated_13, y = var_876_cast_fp16)[name = string("op_885_cast_fp16")]; fp16 var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_weights_cast_fp16 = mul(x = var_885_cast_fp16, y = var_886_to_fp16)[name = string("attn_weights_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_897_axes_0 = const()[name = string("op_897_axes_0"), val = tensor([-1])]; bool var_897_keep_dims_0 = const()[name = string("op_897_keep_dims_0"), val = bool(true)]; tensor var_897_cast_fp16 = reduce_sum(axes = var_897_axes_0, keep_dims = var_897_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_897_cast_fp16")]; tensor var_898_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_897_cast_fp16)[name = string("op_898_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 256, 1024])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_898_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1024, 128])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 256, 128])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_901_perm_0 = const()[name = string("op_901_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_903 = const()[name = string("op_903"), val = tensor([1, 256, 4096])]; tensor var_901_cast_fp16 = transpose(perm = var_901_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_2")]; tensor input_47_cast_fp16 = reshape(shape = var_903, x = var_901_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711594048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724177024))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_914_axes_0 = const()[name = string("op_914_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724242624)))]; tensor var_914_cast_fp16 = layer_norm(axes = var_914_axes_0, epsilon = var_35_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_914_cast_fp16")]; tensor var_921 = const()[name = string("op_921"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_922 = transpose(perm = var_921, x = var_914_cast_fp16)[name = string("transpose_1")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_922)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states")]; tensor gate_states = silu(x = input_53)[name = string("gate_states")]; tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; tensor var_944_axes_0 = const()[name = string("op_944_axes_0"), val = tensor([2])]; tensor var_944 = squeeze(axes = var_944_axes_0, x = hidden_states_1)[name = string("op_944")]; tensor var_945 = const()[name = string("op_945"), val = tensor([0, 2, 1])]; tensor var_946 = transpose(perm = var_945, x = var_944)[name = string("transpose_0")]; tensor output_hidden_states = add(x = hidden_states_29_cast_fp16, y = var_946)[name = string("op_947_cast_fp16")]; } -> (output_hidden_states); }