diff --git "a/llama_FFN_PF_lut4_chunk_01of02.mlmodelc/model.mil" "b/llama_FFN_PF_lut4_chunk_01of02.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/llama_FFN_PF_lut4_chunk_01of02.mlmodelc/model.mil" @@ -0,0 +1,3295 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] +{ + func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201946816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210335488))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210368320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218756992))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218789824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227178496))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; + int32 var_50 = const()[name = string("op_50"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_259_axis_0 = const()[name = string("op_259_axis_0"), val = int32(1)]; + int32 var_259_batch_dims_0 = const()[name = string("op_259_batch_dims_0"), val = int32(0)]; + bool var_259_validate_indices_0 = const()[name = string("op_259_validate_indices_0"), val = bool(false)]; + tensor var_55_to_fp16 = const()[name = string("op_55_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752)))]; + tensor var_259_cast_fp16 = gather(axis = var_259_axis_0, batch_dims = var_259_batch_dims_0, indices = select_0, validate_indices = var_259_validate_indices_0, x = var_55_to_fp16)[name = string("op_259_cast_fp16")]; + tensor var_260 = const()[name = string("op_260"), val = tensor([1, 1, 1, -1])]; + tensor sin_1_cast_fp16 = reshape(shape = var_260, x = var_259_cast_fp16)[name = string("sin_1_cast_fp16")]; + int32 var_264_axis_0 = const()[name = string("op_264_axis_0"), val = int32(1)]; + int32 var_264_batch_dims_0 = const()[name = string("op_264_batch_dims_0"), val = int32(0)]; + bool var_264_validate_indices_0 = const()[name = string("op_264_validate_indices_0"), val = bool(false)]; + tensor var_49_to_fp16 = const()[name = string("op_49_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243964032)))]; + tensor var_264_cast_fp16 = gather(axis = var_264_axis_0, batch_dims = var_264_batch_dims_0, indices = select_0, validate_indices = var_264_validate_indices_0, x = var_49_to_fp16)[name = string("op_264_cast_fp16")]; + tensor var_265 = const()[name = string("op_265"), val = tensor([1, 1, 1, -1])]; + tensor cos_1_cast_fp16 = reshape(shape = var_265, x = var_264_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_273_axes_0 = const()[name = string("op_273_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260741312)))]; + fp16 var_45_to_fp16 = const()[name = string("op_45_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_273_cast_fp16 = layer_norm(axes = var_273_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_273_cast_fp16")]; + tensor var_276 = const()[name = string("op_276"), val = tensor([0, 2, 1])]; + tensor var_278_axes_0 = const()[name = string("op_278_axes_0"), val = tensor([2])]; + tensor var_277 = transpose(perm = var_276, x = var_273_cast_fp16)[name = string("transpose_31")]; + tensor var_278 = expand_dims(axes = var_278_axes_0, x = var_277)[name = string("op_278")]; + string var_285_pad_type_0 = const()[name = string("op_285_pad_type_0"), val = string("valid")]; + tensor var_285_strides_0 = const()[name = string("op_285_strides_0"), val = tensor([1, 1])]; + tensor var_285_pad_0 = const()[name = string("op_285_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_285_dilations_0 = const()[name = string("op_285_dilations_0"), val = tensor([1, 1])]; + int32 var_285_groups_0 = const()[name = string("op_285_groups_0"), val = int32(1)]; + tensor var_285 = conv(dilations = var_285_dilations_0, groups = var_285_groups_0, pad = var_285_pad_0, pad_type = var_285_pad_type_0, strides = var_285_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_278)[name = string("op_285")]; + tensor var_286 = const()[name = string("op_286"), val = tensor([1, 32, 1, 64])]; + tensor var_287 = reshape(shape = var_286, x = var_285)[name = string("op_287")]; + string var_294_pad_type_0 = const()[name = string("op_294_pad_type_0"), val = string("valid")]; + tensor var_294_strides_0 = const()[name = string("op_294_strides_0"), val = tensor([1, 1])]; + tensor var_294_pad_0 = const()[name = string("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_dilations_0 = const()[name = string("op_294_dilations_0"), val = tensor([1, 1])]; + int32 var_294_groups_0 = const()[name = string("op_294_groups_0"), val = int32(1)]; + tensor var_294 = conv(dilations = var_294_dilations_0, groups = var_294_groups_0, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_294_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_278)[name = string("op_294")]; + tensor var_295 = const()[name = string("op_295"), val = tensor([1, 8, 1, 64])]; + tensor var_296 = reshape(shape = var_295, x = var_294)[name = string("op_296")]; + string var_303_pad_type_0 = const()[name = string("op_303_pad_type_0"), val = string("valid")]; + tensor var_303_strides_0 = const()[name = string("op_303_strides_0"), val = tensor([1, 1])]; + tensor var_303_pad_0 = const()[name = string("op_303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_303_dilations_0 = const()[name = string("op_303_dilations_0"), val = tensor([1, 1])]; + int32 var_303_groups_0 = const()[name = string("op_303_groups_0"), val = int32(1)]; + tensor var_303 = conv(dilations = var_303_dilations_0, groups = var_303_groups_0, pad = var_303_pad_0, pad_type = var_303_pad_type_0, strides = var_303_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_278)[name = string("op_303")]; + tensor var_304 = const()[name = string("op_304"), val = tensor([1, 8, 1, 64])]; + tensor var_305 = reshape(shape = var_304, x = var_303)[name = string("op_305")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_287)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_287)[name = string("x2_1")]; + tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 32])]; + tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; + tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 32])]; + tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; + tensor var_319_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_319_cast_fp16")]; + tensor var_320_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_320_cast_fp16")]; + tensor var_321_cast_fp16 = sub(x = var_319_cast_fp16, y = var_320_cast_fp16)[name = string("op_321_cast_fp16")]; + tensor var_322_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_322_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = add(x = var_322_cast_fp16, y = var_323_cast_fp16)[name = string("op_324_cast_fp16")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_50, interleave = rotated_1_interleave_0, values = (var_321_cast_fp16, var_324_cast_fp16))[name = string("rotated_1_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_296)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_296)[name = string("x2_3")]; + tensor var_340_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_340_cast_fp16")]; + tensor var_341_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_341_cast_fp16")]; + tensor var_342_cast_fp16 = sub(x = var_340_cast_fp16, y = var_341_cast_fp16)[name = string("op_342_cast_fp16")]; + tensor var_343_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_343_cast_fp16")]; + tensor var_344_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_344_cast_fp16")]; + tensor var_345_cast_fp16 = add(x = var_343_cast_fp16, y = var_344_cast_fp16)[name = string("op_345_cast_fp16")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_50, interleave = rotated_3_interleave_0, values = (var_342_cast_fp16, var_345_cast_fp16))[name = string("rotated_3_cast_fp16")]; + int32 var_349 = const()[name = string("op_349"), val = int32(1)]; + tensor var_350 = add(x = current_pos, y = var_349)[name = string("op_350")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_350, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([16])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([17])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_350, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_305, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; + tensor var_365_begin_0 = const()[name = string("op_365_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_365_end_0 = const()[name = string("op_365_end_0"), val = tensor([1, 8, 1546, 64])]; + tensor var_365_end_mask_0 = const()[name = string("op_365_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_365_cast_fp16 = slice_by_index(begin = var_365_begin_0, end = var_365_end_0, end_mask = var_365_end_mask_0, x = coreml_update_state_17)[name = string("op_365_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_365_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_367_begin_0 = const()[name = string("op_367_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_367_end_0 = const()[name = string("op_367_end_0"), val = tensor([17, 8, 1546, 64])]; + tensor var_367_end_mask_0 = const()[name = string("op_367_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = var_367_end_0, end_mask = var_367_end_mask_0, x = coreml_update_state_17)[name = string("op_367_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_367_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_376 = const()[name = string("op_376"), val = tensor([1, 4, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_376, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_380 = const()[name = string("op_380"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_3_cast_fp16 = reshape(shape = var_380, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_383 = const()[name = string("op_383"), val = tensor([1, 4, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_383, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_387 = const()[name = string("op_387"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_3_cast_fp16 = reshape(shape = var_387, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; + bool var_390_transpose_x_1 = const()[name = string("op_390_transpose_x_1"), val = bool(false)]; + bool var_390_transpose_y_1 = const()[name = string("op_390_transpose_y_1"), val = bool(true)]; + tensor var_390_cast_fp16 = matmul(transpose_x = var_390_transpose_x_1, transpose_y = var_390_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_390_cast_fp16")]; + fp16 var_391_to_fp16 = const()[name = string("op_391_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_390_cast_fp16, y = var_391_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_402_axes_0 = const()[name = string("op_402_axes_0"), val = tensor([-1])]; + bool var_402_keep_dims_0 = const()[name = string("op_402_keep_dims_0"), val = bool(true)]; + tensor var_402_cast_fp16 = reduce_sum(axes = var_402_axes_0, keep_dims = var_402_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_402_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_402_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; + bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; + bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; + tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_405_perm_0 = const()[name = string("op_405_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_407 = const()[name = string("op_407"), val = tensor([1, 1, 2048])]; + tensor var_405_cast_fp16 = transpose(perm = var_405_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_30")]; + tensor input_5_cast_fp16 = reshape(shape = var_407, x = var_405_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262842688))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262850944)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262855104)))]; + tensor var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor var_425 = const()[name = string("op_425"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_426 = transpose(perm = var_425, x = var_418_cast_fp16)[name = string("transpose_29")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_426)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_448_axes_0 = const()[name = string("op_448_axes_0"), val = tensor([2])]; + tensor var_448 = squeeze(axes = var_448_axes_0, x = hidden_states_7)[name = string("op_448")]; + tensor var_449 = const()[name = string("op_449"), val = tensor([0, 2, 1])]; + tensor var_450 = transpose(perm = var_449, x = var_448)[name = string("transpose_28")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_450)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_458_axes_0 = const()[name = string("op_458_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262859264)))]; + tensor var_458_cast_fp16 = layer_norm(axes = var_458_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_458_cast_fp16")]; + tensor var_461 = const()[name = string("op_461"), val = tensor([0, 2, 1])]; + tensor var_463_axes_0 = const()[name = string("op_463_axes_0"), val = tensor([2])]; + tensor var_462 = transpose(perm = var_461, x = var_458_cast_fp16)[name = string("transpose_27")]; + tensor var_463 = expand_dims(axes = var_463_axes_0, x = var_462)[name = string("op_463")]; + string var_470_pad_type_0 = const()[name = string("op_470_pad_type_0"), val = string("valid")]; + tensor var_470_strides_0 = const()[name = string("op_470_strides_0"), val = tensor([1, 1])]; + tensor var_470_pad_0 = const()[name = string("op_470_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_470_dilations_0 = const()[name = string("op_470_dilations_0"), val = tensor([1, 1])]; + int32 var_470_groups_0 = const()[name = string("op_470_groups_0"), val = int32(1)]; + tensor var_470 = conv(dilations = var_470_dilations_0, groups = var_470_groups_0, pad = var_470_pad_0, pad_type = var_470_pad_type_0, strides = var_470_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_463)[name = string("op_470")]; + tensor var_471 = const()[name = string("op_471"), val = tensor([1, 32, 1, 64])]; + tensor var_472 = reshape(shape = var_471, x = var_470)[name = string("op_472")]; + string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")]; + tensor var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor([1, 1])]; + tensor var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor([1, 1])]; + int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)]; + tensor var_479 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_463)[name = string("op_479")]; + tensor var_480 = const()[name = string("op_480"), val = tensor([1, 8, 1, 64])]; + tensor var_481 = reshape(shape = var_480, x = var_479)[name = string("op_481")]; + string var_488_pad_type_0 = const()[name = string("op_488_pad_type_0"), val = string("valid")]; + tensor var_488_strides_0 = const()[name = string("op_488_strides_0"), val = tensor([1, 1])]; + tensor var_488_pad_0 = const()[name = string("op_488_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_488_dilations_0 = const()[name = string("op_488_dilations_0"), val = tensor([1, 1])]; + int32 var_488_groups_0 = const()[name = string("op_488_groups_0"), val = int32(1)]; + tensor var_488 = conv(dilations = var_488_dilations_0, groups = var_488_groups_0, pad = var_488_pad_0, pad_type = var_488_pad_type_0, strides = var_488_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_463)[name = string("op_488")]; + tensor var_489 = const()[name = string("op_489"), val = tensor([1, 8, 1, 64])]; + tensor var_490 = reshape(shape = var_489, x = var_488)[name = string("op_490")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_472)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_472)[name = string("x2_5")]; + tensor var_504_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_504_cast_fp16")]; + tensor var_505_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_505_cast_fp16")]; + tensor var_506_cast_fp16 = sub(x = var_504_cast_fp16, y = var_505_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_507_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_507_cast_fp16")]; + tensor var_508_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_508_cast_fp16")]; + tensor var_509_cast_fp16 = add(x = var_507_cast_fp16, y = var_508_cast_fp16)[name = string("op_509_cast_fp16")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_50, interleave = rotated_5_interleave_0, values = (var_506_cast_fp16, var_509_cast_fp16))[name = string("rotated_5_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_481)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_481)[name = string("x2_7")]; + tensor var_525_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_525_cast_fp16")]; + tensor var_526_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_526_cast_fp16")]; + tensor var_527_cast_fp16 = sub(x = var_525_cast_fp16, y = var_526_cast_fp16)[name = string("op_527_cast_fp16")]; + tensor var_528_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_528_cast_fp16")]; + tensor var_529_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_529_cast_fp16")]; + tensor var_530_cast_fp16 = add(x = var_528_cast_fp16, y = var_529_cast_fp16)[name = string("op_530_cast_fp16")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_50, interleave = rotated_7_interleave_0, values = (var_527_cast_fp16, var_530_cast_fp16))[name = string("rotated_7_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; + tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; + tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_350, concat_11_values3_0))[name = string("concat_11")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([17])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([18])]; + int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; + bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; + tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_350, concat_15_values3_0))[name = string("concat_15")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_490, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; + tensor var_550_begin_0 = const()[name = string("op_550_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_550_end_0 = const()[name = string("op_550_end_0"), val = tensor([2, 8, 1546, 64])]; + tensor var_550_end_mask_0 = const()[name = string("op_550_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = coreml_update_state_19)[name = string("op_550_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_550_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_552_begin_0 = const()[name = string("op_552_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_552_end_0 = const()[name = string("op_552_end_0"), val = tensor([18, 8, 1546, 64])]; + tensor var_552_end_mask_0 = const()[name = string("op_552_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_552_cast_fp16 = slice_by_index(begin = var_552_begin_0, end = var_552_end_0, end_mask = var_552_end_mask_0, x = coreml_update_state_19)[name = string("op_552_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_552_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_561 = const()[name = string("op_561"), val = tensor([1, 4, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_561, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_565 = const()[name = string("op_565"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_7_cast_fp16 = reshape(shape = var_565, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_568 = const()[name = string("op_568"), val = tensor([1, 4, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_568, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_572 = const()[name = string("op_572"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_7_cast_fp16 = reshape(shape = var_572, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; + bool var_575_transpose_x_1 = const()[name = string("op_575_transpose_x_1"), val = bool(false)]; + bool var_575_transpose_y_1 = const()[name = string("op_575_transpose_y_1"), val = bool(true)]; + tensor var_575_cast_fp16 = matmul(transpose_x = var_575_transpose_x_1, transpose_y = var_575_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_575_cast_fp16")]; + fp16 var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_575_cast_fp16, y = var_576_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_587_axes_0 = const()[name = string("op_587_axes_0"), val = tensor([-1])]; + bool var_587_keep_dims_0 = const()[name = string("op_587_keep_dims_0"), val = bool(true)]; + tensor var_587_cast_fp16 = reduce_sum(axes = var_587_axes_0, keep_dims = var_587_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_587_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_587_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; + bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; + bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; + tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; + tensor var_590_perm_0 = const()[name = string("op_590_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_592 = const()[name = string("op_592"), val = tensor([1, 1, 2048])]; + tensor var_590_cast_fp16 = transpose(perm = var_590_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_26")]; + tensor input_19_cast_fp16 = reshape(shape = var_592, x = var_590_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262863424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264960640))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_603_axes_0 = const()[name = string("op_603_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264968896)))]; + tensor var_603_cast_fp16 = layer_norm(axes = var_603_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_603_cast_fp16")]; + tensor var_610 = const()[name = string("op_610"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_611 = transpose(perm = var_610, x = var_603_cast_fp16)[name = string("transpose_25")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_611)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_633_axes_0 = const()[name = string("op_633_axes_0"), val = tensor([2])]; + tensor var_633 = squeeze(axes = var_633_axes_0, x = hidden_states_15)[name = string("op_633")]; + tensor var_634 = const()[name = string("op_634"), val = tensor([0, 2, 1])]; + tensor var_635 = transpose(perm = var_634, x = var_633)[name = string("transpose_24")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_635)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_643_axes_0 = const()[name = string("op_643_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264973056)))]; + tensor var_643_cast_fp16 = layer_norm(axes = var_643_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_643_cast_fp16")]; + tensor var_646 = const()[name = string("op_646"), val = tensor([0, 2, 1])]; + tensor var_648_axes_0 = const()[name = string("op_648_axes_0"), val = tensor([2])]; + tensor var_647 = transpose(perm = var_646, x = var_643_cast_fp16)[name = string("transpose_23")]; + tensor var_648 = expand_dims(axes = var_648_axes_0, x = var_647)[name = string("op_648")]; + string var_655_pad_type_0 = const()[name = string("op_655_pad_type_0"), val = string("valid")]; + tensor var_655_strides_0 = const()[name = string("op_655_strides_0"), val = tensor([1, 1])]; + tensor var_655_pad_0 = const()[name = string("op_655_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_655_dilations_0 = const()[name = string("op_655_dilations_0"), val = tensor([1, 1])]; + int32 var_655_groups_0 = const()[name = string("op_655_groups_0"), val = int32(1)]; + tensor var_655 = conv(dilations = var_655_dilations_0, groups = var_655_groups_0, pad = var_655_pad_0, pad_type = var_655_pad_type_0, strides = var_655_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_648)[name = string("op_655")]; + tensor var_656 = const()[name = string("op_656"), val = tensor([1, 32, 1, 64])]; + tensor var_657 = reshape(shape = var_656, x = var_655)[name = string("op_657")]; + string var_664_pad_type_0 = const()[name = string("op_664_pad_type_0"), val = string("valid")]; + tensor var_664_strides_0 = const()[name = string("op_664_strides_0"), val = tensor([1, 1])]; + tensor var_664_pad_0 = const()[name = string("op_664_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_664_dilations_0 = const()[name = string("op_664_dilations_0"), val = tensor([1, 1])]; + int32 var_664_groups_0 = const()[name = string("op_664_groups_0"), val = int32(1)]; + tensor var_664 = conv(dilations = var_664_dilations_0, groups = var_664_groups_0, pad = var_664_pad_0, pad_type = var_664_pad_type_0, strides = var_664_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_648)[name = string("op_664")]; + tensor var_665 = const()[name = string("op_665"), val = tensor([1, 8, 1, 64])]; + tensor var_666 = reshape(shape = var_665, x = var_664)[name = string("op_666")]; + string var_673_pad_type_0 = const()[name = string("op_673_pad_type_0"), val = string("valid")]; + tensor var_673_strides_0 = const()[name = string("op_673_strides_0"), val = tensor([1, 1])]; + tensor var_673_pad_0 = const()[name = string("op_673_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_673_dilations_0 = const()[name = string("op_673_dilations_0"), val = tensor([1, 1])]; + int32 var_673_groups_0 = const()[name = string("op_673_groups_0"), val = int32(1)]; + tensor var_673 = conv(dilations = var_673_dilations_0, groups = var_673_groups_0, pad = var_673_pad_0, pad_type = var_673_pad_type_0, strides = var_673_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_648)[name = string("op_673")]; + tensor var_674 = const()[name = string("op_674"), val = tensor([1, 8, 1, 64])]; + tensor var_675 = reshape(shape = var_674, x = var_673)[name = string("op_675")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_657)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_657)[name = string("x2_9")]; + tensor var_689_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_689_cast_fp16")]; + tensor var_690_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_690_cast_fp16")]; + tensor var_691_cast_fp16 = sub(x = var_689_cast_fp16, y = var_690_cast_fp16)[name = string("op_691_cast_fp16")]; + tensor var_692_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_692_cast_fp16")]; + tensor var_693_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_693_cast_fp16")]; + tensor var_694_cast_fp16 = add(x = var_692_cast_fp16, y = var_693_cast_fp16)[name = string("op_694_cast_fp16")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_50, interleave = rotated_9_interleave_0, values = (var_691_cast_fp16, var_694_cast_fp16))[name = string("rotated_9_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_666)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_666)[name = string("x2_11")]; + tensor var_710_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_710_cast_fp16")]; + tensor var_711_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_711_cast_fp16")]; + tensor var_712_cast_fp16 = sub(x = var_710_cast_fp16, y = var_711_cast_fp16)[name = string("op_712_cast_fp16")]; + tensor var_713_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_713_cast_fp16")]; + tensor var_714_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_714_cast_fp16")]; + tensor var_715_cast_fp16 = add(x = var_713_cast_fp16, y = var_714_cast_fp16)[name = string("op_715_cast_fp16")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11_cast_fp16 = concat(axis = var_50, interleave = rotated_11_interleave_0, values = (var_712_cast_fp16, var_715_cast_fp16))[name = string("rotated_11_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; + tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; + tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; + int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; + bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; + tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_350, concat_19_values3_0))[name = string("concat_19")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([18])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([19])]; + int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; + bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; + tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; + tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; + tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; + int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; + bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; + tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_350, concat_23_values3_0))[name = string("concat_23")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_675, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; + tensor var_735_begin_0 = const()[name = string("op_735_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_735_end_0 = const()[name = string("op_735_end_0"), val = tensor([3, 8, 1546, 64])]; + tensor var_735_end_mask_0 = const()[name = string("op_735_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_735_cast_fp16 = slice_by_index(begin = var_735_begin_0, end = var_735_end_0, end_mask = var_735_end_mask_0, x = coreml_update_state_21)[name = string("op_735_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_735_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_737_begin_0 = const()[name = string("op_737_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_737_end_0 = const()[name = string("op_737_end_0"), val = tensor([19, 8, 1546, 64])]; + tensor var_737_end_mask_0 = const()[name = string("op_737_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_737_cast_fp16 = slice_by_index(begin = var_737_begin_0, end = var_737_end_0, end_mask = var_737_end_mask_0, x = coreml_update_state_21)[name = string("op_737_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_737_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_746 = const()[name = string("op_746"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_746, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_750 = const()[name = string("op_750"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_11_cast_fp16 = reshape(shape = var_750, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_753 = const()[name = string("op_753"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_753, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_757 = const()[name = string("op_757"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_11_cast_fp16 = reshape(shape = var_757, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + bool var_760_transpose_x_1 = const()[name = string("op_760_transpose_x_1"), val = bool(false)]; + bool var_760_transpose_y_1 = const()[name = string("op_760_transpose_y_1"), val = bool(true)]; + tensor var_760_cast_fp16 = matmul(transpose_x = var_760_transpose_x_1, transpose_y = var_760_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_760_cast_fp16")]; + fp16 var_761_to_fp16 = const()[name = string("op_761_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_760_cast_fp16, y = var_761_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_772_axes_0 = const()[name = string("op_772_axes_0"), val = tensor([-1])]; + bool var_772_keep_dims_0 = const()[name = string("op_772_keep_dims_0"), val = bool(true)]; + tensor var_772_cast_fp16 = reduce_sum(axes = var_772_axes_0, keep_dims = var_772_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_772_cast_fp16")]; + tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_772_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; + bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; + bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; + tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_775_perm_0 = const()[name = string("op_775_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_777 = const()[name = string("op_777"), val = tensor([1, 1, 2048])]; + tensor var_775_cast_fp16 = transpose(perm = var_775_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_22")]; + tensor input_33_cast_fp16 = reshape(shape = var_777, x = var_775_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264977216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267074432))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_788_axes_0 = const()[name = string("op_788_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267082688)))]; + tensor var_788_cast_fp16 = layer_norm(axes = var_788_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_788_cast_fp16")]; + tensor var_795 = const()[name = string("op_795"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_796 = transpose(perm = var_795, x = var_788_cast_fp16)[name = string("transpose_21")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_796)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_818_axes_0 = const()[name = string("op_818_axes_0"), val = tensor([2])]; + tensor var_818 = squeeze(axes = var_818_axes_0, x = hidden_states_23)[name = string("op_818")]; + tensor var_819 = const()[name = string("op_819"), val = tensor([0, 2, 1])]; + tensor var_820 = transpose(perm = var_819, x = var_818)[name = string("transpose_20")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_820)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_828_axes_0 = const()[name = string("op_828_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267086848)))]; + tensor var_828_cast_fp16 = layer_norm(axes = var_828_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_828_cast_fp16")]; + tensor var_831 = const()[name = string("op_831"), val = tensor([0, 2, 1])]; + tensor var_833_axes_0 = const()[name = string("op_833_axes_0"), val = tensor([2])]; + tensor var_832 = transpose(perm = var_831, x = var_828_cast_fp16)[name = string("transpose_19")]; + tensor var_833 = expand_dims(axes = var_833_axes_0, x = var_832)[name = string("op_833")]; + string var_840_pad_type_0 = const()[name = string("op_840_pad_type_0"), val = string("valid")]; + tensor var_840_strides_0 = const()[name = string("op_840_strides_0"), val = tensor([1, 1])]; + tensor var_840_pad_0 = const()[name = string("op_840_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_840_dilations_0 = const()[name = string("op_840_dilations_0"), val = tensor([1, 1])]; + int32 var_840_groups_0 = const()[name = string("op_840_groups_0"), val = int32(1)]; + tensor var_840 = conv(dilations = var_840_dilations_0, groups = var_840_groups_0, pad = var_840_pad_0, pad_type = var_840_pad_type_0, strides = var_840_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_833)[name = string("op_840")]; + tensor var_841 = const()[name = string("op_841"), val = tensor([1, 32, 1, 64])]; + tensor var_842 = reshape(shape = var_841, x = var_840)[name = string("op_842")]; + string var_849_pad_type_0 = const()[name = string("op_849_pad_type_0"), val = string("valid")]; + tensor var_849_strides_0 = const()[name = string("op_849_strides_0"), val = tensor([1, 1])]; + tensor var_849_pad_0 = const()[name = string("op_849_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_849_dilations_0 = const()[name = string("op_849_dilations_0"), val = tensor([1, 1])]; + int32 var_849_groups_0 = const()[name = string("op_849_groups_0"), val = int32(1)]; + tensor var_849 = conv(dilations = var_849_dilations_0, groups = var_849_groups_0, pad = var_849_pad_0, pad_type = var_849_pad_type_0, strides = var_849_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_833)[name = string("op_849")]; + tensor var_850 = const()[name = string("op_850"), val = tensor([1, 8, 1, 64])]; + tensor var_851 = reshape(shape = var_850, x = var_849)[name = string("op_851")]; + string var_858_pad_type_0 = const()[name = string("op_858_pad_type_0"), val = string("valid")]; + tensor var_858_strides_0 = const()[name = string("op_858_strides_0"), val = tensor([1, 1])]; + tensor var_858_pad_0 = const()[name = string("op_858_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_858_dilations_0 = const()[name = string("op_858_dilations_0"), val = tensor([1, 1])]; + int32 var_858_groups_0 = const()[name = string("op_858_groups_0"), val = int32(1)]; + tensor var_858 = conv(dilations = var_858_dilations_0, groups = var_858_groups_0, pad = var_858_pad_0, pad_type = var_858_pad_type_0, strides = var_858_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_833)[name = string("op_858")]; + tensor var_859 = const()[name = string("op_859"), val = tensor([1, 8, 1, 64])]; + tensor var_860 = reshape(shape = var_859, x = var_858)[name = string("op_860")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_842)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_842)[name = string("x2_13")]; + tensor var_874_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_874_cast_fp16")]; + tensor var_875_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_875_cast_fp16")]; + tensor var_876_cast_fp16 = sub(x = var_874_cast_fp16, y = var_875_cast_fp16)[name = string("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_877_cast_fp16")]; + tensor var_878_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_878_cast_fp16")]; + tensor var_879_cast_fp16 = add(x = var_877_cast_fp16, y = var_878_cast_fp16)[name = string("op_879_cast_fp16")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13_cast_fp16 = concat(axis = var_50, interleave = rotated_13_interleave_0, values = (var_876_cast_fp16, var_879_cast_fp16))[name = string("rotated_13_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_851)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_851)[name = string("x2_15")]; + tensor var_895_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_895_cast_fp16")]; + tensor var_896_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_896_cast_fp16")]; + tensor var_897_cast_fp16 = sub(x = var_895_cast_fp16, y = var_896_cast_fp16)[name = string("op_897_cast_fp16")]; + tensor var_898_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_898_cast_fp16")]; + tensor var_899_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_899_cast_fp16")]; + tensor var_900_cast_fp16 = add(x = var_898_cast_fp16, y = var_899_cast_fp16)[name = string("op_900_cast_fp16")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15_cast_fp16 = concat(axis = var_50, interleave = rotated_15_interleave_0, values = (var_897_cast_fp16, var_900_cast_fp16))[name = string("rotated_15_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_350, concat_27_values3_0))[name = string("concat_27")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([19])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([20])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; + tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; + tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; + int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; + bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; + tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_350, concat_31_values3_0))[name = string("concat_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_860, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; + tensor var_920_begin_0 = const()[name = string("op_920_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = string("op_920_end_0"), val = tensor([4, 8, 1546, 64])]; + tensor var_920_end_mask_0 = const()[name = string("op_920_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = coreml_update_state_23)[name = string("op_920_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_920_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_922_begin_0 = const()[name = string("op_922_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_922_end_0 = const()[name = string("op_922_end_0"), val = tensor([20, 8, 1546, 64])]; + tensor var_922_end_mask_0 = const()[name = string("op_922_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_922_cast_fp16 = slice_by_index(begin = var_922_begin_0, end = var_922_end_0, end_mask = var_922_end_mask_0, x = coreml_update_state_23)[name = string("op_922_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_922_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_931 = const()[name = string("op_931"), val = tensor([1, 4, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_931, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_935 = const()[name = string("op_935"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_15_cast_fp16 = reshape(shape = var_935, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_938 = const()[name = string("op_938"), val = tensor([1, 4, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_938, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_942 = const()[name = string("op_942"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_15_cast_fp16 = reshape(shape = var_942, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; + bool var_945_transpose_x_1 = const()[name = string("op_945_transpose_x_1"), val = bool(false)]; + bool var_945_transpose_y_1 = const()[name = string("op_945_transpose_y_1"), val = bool(true)]; + tensor var_945_cast_fp16 = matmul(transpose_x = var_945_transpose_x_1, transpose_y = var_945_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_945_cast_fp16")]; + fp16 var_946_to_fp16 = const()[name = string("op_946_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_945_cast_fp16, y = var_946_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_957_axes_0 = const()[name = string("op_957_axes_0"), val = tensor([-1])]; + bool var_957_keep_dims_0 = const()[name = string("op_957_keep_dims_0"), val = bool(true)]; + tensor var_957_cast_fp16 = reduce_sum(axes = var_957_axes_0, keep_dims = var_957_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_957_cast_fp16")]; + tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_957_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; + bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; + bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; + tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; + tensor var_960_perm_0 = const()[name = string("op_960_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_962 = const()[name = string("op_962"), val = tensor([1, 1, 2048])]; + tensor var_960_cast_fp16 = transpose(perm = var_960_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_18")]; + tensor input_47_cast_fp16 = reshape(shape = var_962, x = var_960_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267091008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269188224))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_973_axes_0 = const()[name = string("op_973_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196480)))]; + tensor var_973_cast_fp16 = layer_norm(axes = var_973_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_973_cast_fp16")]; + tensor var_980 = const()[name = string("op_980"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_981 = transpose(perm = var_980, x = var_973_cast_fp16)[name = string("transpose_17")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_981)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1003_axes_0 = const()[name = string("op_1003_axes_0"), val = tensor([2])]; + tensor var_1003 = squeeze(axes = var_1003_axes_0, x = hidden_states_31)[name = string("op_1003")]; + tensor var_1004 = const()[name = string("op_1004"), val = tensor([0, 2, 1])]; + tensor var_1005 = transpose(perm = var_1004, x = var_1003)[name = string("transpose_16")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1005)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1013_axes_0 = const()[name = string("op_1013_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269200640)))]; + tensor var_1013_cast_fp16 = layer_norm(axes = var_1013_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1013_cast_fp16")]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([0, 2, 1])]; + tensor var_1018_axes_0 = const()[name = string("op_1018_axes_0"), val = tensor([2])]; + tensor var_1017 = transpose(perm = var_1016, x = var_1013_cast_fp16)[name = string("transpose_15")]; + tensor var_1018 = expand_dims(axes = var_1018_axes_0, x = var_1017)[name = string("op_1018")]; + string var_1025_pad_type_0 = const()[name = string("op_1025_pad_type_0"), val = string("valid")]; + tensor var_1025_strides_0 = const()[name = string("op_1025_strides_0"), val = tensor([1, 1])]; + tensor var_1025_pad_0 = const()[name = string("op_1025_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1025_dilations_0 = const()[name = string("op_1025_dilations_0"), val = tensor([1, 1])]; + int32 var_1025_groups_0 = const()[name = string("op_1025_groups_0"), val = int32(1)]; + tensor var_1025 = conv(dilations = var_1025_dilations_0, groups = var_1025_groups_0, pad = var_1025_pad_0, pad_type = var_1025_pad_type_0, strides = var_1025_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1018)[name = string("op_1025")]; + tensor var_1026 = const()[name = string("op_1026"), val = tensor([1, 32, 1, 64])]; + tensor var_1027 = reshape(shape = var_1026, x = var_1025)[name = string("op_1027")]; + string var_1034_pad_type_0 = const()[name = string("op_1034_pad_type_0"), val = string("valid")]; + tensor var_1034_strides_0 = const()[name = string("op_1034_strides_0"), val = tensor([1, 1])]; + tensor var_1034_pad_0 = const()[name = string("op_1034_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1034_dilations_0 = const()[name = string("op_1034_dilations_0"), val = tensor([1, 1])]; + int32 var_1034_groups_0 = const()[name = string("op_1034_groups_0"), val = int32(1)]; + tensor var_1034 = conv(dilations = var_1034_dilations_0, groups = var_1034_groups_0, pad = var_1034_pad_0, pad_type = var_1034_pad_type_0, strides = var_1034_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1018)[name = string("op_1034")]; + tensor var_1035 = const()[name = string("op_1035"), val = tensor([1, 8, 1, 64])]; + tensor var_1036 = reshape(shape = var_1035, x = var_1034)[name = string("op_1036")]; + string var_1043_pad_type_0 = const()[name = string("op_1043_pad_type_0"), val = string("valid")]; + tensor var_1043_strides_0 = const()[name = string("op_1043_strides_0"), val = tensor([1, 1])]; + tensor var_1043_pad_0 = const()[name = string("op_1043_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1043_dilations_0 = const()[name = string("op_1043_dilations_0"), val = tensor([1, 1])]; + int32 var_1043_groups_0 = const()[name = string("op_1043_groups_0"), val = int32(1)]; + tensor var_1043 = conv(dilations = var_1043_dilations_0, groups = var_1043_groups_0, pad = var_1043_pad_0, pad_type = var_1043_pad_type_0, strides = var_1043_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1018)[name = string("op_1043")]; + tensor var_1044 = const()[name = string("op_1044"), val = tensor([1, 8, 1, 64])]; + tensor var_1045 = reshape(shape = var_1044, x = var_1043)[name = string("op_1045")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1027)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1027)[name = string("x2_17")]; + tensor var_1059_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1059_cast_fp16")]; + tensor var_1060_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1060_cast_fp16")]; + tensor var_1061_cast_fp16 = sub(x = var_1059_cast_fp16, y = var_1060_cast_fp16)[name = string("op_1061_cast_fp16")]; + tensor var_1062_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1062_cast_fp16")]; + tensor var_1063_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1063_cast_fp16")]; + tensor var_1064_cast_fp16 = add(x = var_1062_cast_fp16, y = var_1063_cast_fp16)[name = string("op_1064_cast_fp16")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17_cast_fp16 = concat(axis = var_50, interleave = rotated_17_interleave_0, values = (var_1061_cast_fp16, var_1064_cast_fp16))[name = string("rotated_17_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1036)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1036)[name = string("x2_19")]; + tensor var_1080_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1080_cast_fp16")]; + tensor var_1081_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1081_cast_fp16")]; + tensor var_1082_cast_fp16 = sub(x = var_1080_cast_fp16, y = var_1081_cast_fp16)[name = string("op_1082_cast_fp16")]; + tensor var_1083_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1083_cast_fp16")]; + tensor var_1084_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1084_cast_fp16")]; + tensor var_1085_cast_fp16 = add(x = var_1083_cast_fp16, y = var_1084_cast_fp16)[name = string("op_1085_cast_fp16")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19_cast_fp16 = concat(axis = var_50, interleave = rotated_19_interleave_0, values = (var_1082_cast_fp16, var_1085_cast_fp16))[name = string("rotated_19_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; + tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; + tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; + int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; + bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; + tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_350, concat_35_values3_0))[name = string("concat_35")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([20])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([21])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_350, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1045, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; + tensor var_1105_begin_0 = const()[name = string("op_1105_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1105_end_0 = const()[name = string("op_1105_end_0"), val = tensor([5, 8, 1546, 64])]; + tensor var_1105_end_mask_0 = const()[name = string("op_1105_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1105_cast_fp16 = slice_by_index(begin = var_1105_begin_0, end = var_1105_end_0, end_mask = var_1105_end_mask_0, x = coreml_update_state_25)[name = string("op_1105_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1105_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1107_begin_0 = const()[name = string("op_1107_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_1107_end_0 = const()[name = string("op_1107_end_0"), val = tensor([21, 8, 1546, 64])]; + tensor var_1107_end_mask_0 = const()[name = string("op_1107_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1107_cast_fp16 = slice_by_index(begin = var_1107_begin_0, end = var_1107_end_0, end_mask = var_1107_end_mask_0, x = coreml_update_state_25)[name = string("op_1107_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1107_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1116 = const()[name = string("op_1116"), val = tensor([1, 4, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1116, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1120 = const()[name = string("op_1120"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_19_cast_fp16 = reshape(shape = var_1120, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1123 = const()[name = string("op_1123"), val = tensor([1, 4, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1123, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1127 = const()[name = string("op_1127"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_19_cast_fp16 = reshape(shape = var_1127, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; + bool var_1130_transpose_x_1 = const()[name = string("op_1130_transpose_x_1"), val = bool(false)]; + bool var_1130_transpose_y_1 = const()[name = string("op_1130_transpose_y_1"), val = bool(true)]; + tensor var_1130_cast_fp16 = matmul(transpose_x = var_1130_transpose_x_1, transpose_y = var_1130_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1130_cast_fp16")]; + fp16 var_1131_to_fp16 = const()[name = string("op_1131_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_17_cast_fp16 = mul(x = var_1130_cast_fp16, y = var_1131_to_fp16)[name = string("attn_weights_17_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1142_axes_0 = const()[name = string("op_1142_axes_0"), val = tensor([-1])]; + bool var_1142_keep_dims_0 = const()[name = string("op_1142_keep_dims_0"), val = bool(true)]; + tensor var_1142_cast_fp16 = reduce_sum(axes = var_1142_axes_0, keep_dims = var_1142_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1142_cast_fp16")]; + tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1142_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; + bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; + bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; + tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1145_perm_0 = const()[name = string("op_1145_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1147 = const()[name = string("op_1147"), val = tensor([1, 1, 2048])]; + tensor var_1145_cast_fp16 = transpose(perm = var_1145_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_14")]; + tensor input_61_cast_fp16 = reshape(shape = var_1147, x = var_1145_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269204800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271302016))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1158_axes_0 = const()[name = string("op_1158_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271310272)))]; + tensor var_1158_cast_fp16 = layer_norm(axes = var_1158_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1158_cast_fp16")]; + tensor var_1165 = const()[name = string("op_1165"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1166 = transpose(perm = var_1165, x = var_1158_cast_fp16)[name = string("transpose_13")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1166)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1188_axes_0 = const()[name = string("op_1188_axes_0"), val = tensor([2])]; + tensor var_1188 = squeeze(axes = var_1188_axes_0, x = hidden_states_39)[name = string("op_1188")]; + tensor var_1189 = const()[name = string("op_1189"), val = tensor([0, 2, 1])]; + tensor var_1190 = transpose(perm = var_1189, x = var_1188)[name = string("transpose_12")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1190)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1198_axes_0 = const()[name = string("op_1198_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271314432)))]; + tensor var_1198_cast_fp16 = layer_norm(axes = var_1198_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1198_cast_fp16")]; + tensor var_1201 = const()[name = string("op_1201"), val = tensor([0, 2, 1])]; + tensor var_1203_axes_0 = const()[name = string("op_1203_axes_0"), val = tensor([2])]; + tensor var_1202 = transpose(perm = var_1201, x = var_1198_cast_fp16)[name = string("transpose_11")]; + tensor var_1203 = expand_dims(axes = var_1203_axes_0, x = var_1202)[name = string("op_1203")]; + string var_1210_pad_type_0 = const()[name = string("op_1210_pad_type_0"), val = string("valid")]; + tensor var_1210_strides_0 = const()[name = string("op_1210_strides_0"), val = tensor([1, 1])]; + tensor var_1210_pad_0 = const()[name = string("op_1210_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1210_dilations_0 = const()[name = string("op_1210_dilations_0"), val = tensor([1, 1])]; + int32 var_1210_groups_0 = const()[name = string("op_1210_groups_0"), val = int32(1)]; + tensor var_1210 = conv(dilations = var_1210_dilations_0, groups = var_1210_groups_0, pad = var_1210_pad_0, pad_type = var_1210_pad_type_0, strides = var_1210_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1203)[name = string("op_1210")]; + tensor var_1211 = const()[name = string("op_1211"), val = tensor([1, 32, 1, 64])]; + tensor var_1212 = reshape(shape = var_1211, x = var_1210)[name = string("op_1212")]; + string var_1219_pad_type_0 = const()[name = string("op_1219_pad_type_0"), val = string("valid")]; + tensor var_1219_strides_0 = const()[name = string("op_1219_strides_0"), val = tensor([1, 1])]; + tensor var_1219_pad_0 = const()[name = string("op_1219_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1219_dilations_0 = const()[name = string("op_1219_dilations_0"), val = tensor([1, 1])]; + int32 var_1219_groups_0 = const()[name = string("op_1219_groups_0"), val = int32(1)]; + tensor var_1219 = conv(dilations = var_1219_dilations_0, groups = var_1219_groups_0, pad = var_1219_pad_0, pad_type = var_1219_pad_type_0, strides = var_1219_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1203)[name = string("op_1219")]; + tensor var_1220 = const()[name = string("op_1220"), val = tensor([1, 8, 1, 64])]; + tensor var_1221 = reshape(shape = var_1220, x = var_1219)[name = string("op_1221")]; + string var_1228_pad_type_0 = const()[name = string("op_1228_pad_type_0"), val = string("valid")]; + tensor var_1228_strides_0 = const()[name = string("op_1228_strides_0"), val = tensor([1, 1])]; + tensor var_1228_pad_0 = const()[name = string("op_1228_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1228_dilations_0 = const()[name = string("op_1228_dilations_0"), val = tensor([1, 1])]; + int32 var_1228_groups_0 = const()[name = string("op_1228_groups_0"), val = int32(1)]; + tensor var_1228 = conv(dilations = var_1228_dilations_0, groups = var_1228_groups_0, pad = var_1228_pad_0, pad_type = var_1228_pad_type_0, strides = var_1228_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1203)[name = string("op_1228")]; + tensor var_1229 = const()[name = string("op_1229"), val = tensor([1, 8, 1, 64])]; + tensor var_1230 = reshape(shape = var_1229, x = var_1228)[name = string("op_1230")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1212)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1212)[name = string("x2_21")]; + tensor var_1244_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1244_cast_fp16")]; + tensor var_1245_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1245_cast_fp16")]; + tensor var_1246_cast_fp16 = sub(x = var_1244_cast_fp16, y = var_1245_cast_fp16)[name = string("op_1246_cast_fp16")]; + tensor var_1247_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1247_cast_fp16")]; + tensor var_1248_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1248_cast_fp16")]; + tensor var_1249_cast_fp16 = add(x = var_1247_cast_fp16, y = var_1248_cast_fp16)[name = string("op_1249_cast_fp16")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21_cast_fp16 = concat(axis = var_50, interleave = rotated_21_interleave_0, values = (var_1246_cast_fp16, var_1249_cast_fp16))[name = string("rotated_21_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1221)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1221)[name = string("x2_23")]; + tensor var_1265_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1265_cast_fp16")]; + tensor var_1266_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1266_cast_fp16")]; + tensor var_1267_cast_fp16 = sub(x = var_1265_cast_fp16, y = var_1266_cast_fp16)[name = string("op_1267_cast_fp16")]; + tensor var_1268_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1268_cast_fp16")]; + tensor var_1269_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1269_cast_fp16")]; + tensor var_1270_cast_fp16 = add(x = var_1268_cast_fp16, y = var_1269_cast_fp16)[name = string("op_1270_cast_fp16")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23_cast_fp16 = concat(axis = var_50, interleave = rotated_23_interleave_0, values = (var_1267_cast_fp16, var_1270_cast_fp16))[name = string("rotated_23_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_350, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([21])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([22])]; + int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; + bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; + tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; + tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; + tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; + int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; + bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; + tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_350, concat_47_values3_0))[name = string("concat_47")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1230, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; + tensor var_1290_begin_0 = const()[name = string("op_1290_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1290_end_0 = const()[name = string("op_1290_end_0"), val = tensor([6, 8, 1546, 64])]; + tensor var_1290_end_mask_0 = const()[name = string("op_1290_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1290_cast_fp16 = slice_by_index(begin = var_1290_begin_0, end = var_1290_end_0, end_mask = var_1290_end_mask_0, x = coreml_update_state_27)[name = string("op_1290_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1290_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = string("op_1292_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_1292_end_0 = const()[name = string("op_1292_end_0"), val = tensor([22, 8, 1546, 64])]; + tensor var_1292_end_mask_0 = const()[name = string("op_1292_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = coreml_update_state_27)[name = string("op_1292_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1292_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1301 = const()[name = string("op_1301"), val = tensor([1, 4, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1301, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1305 = const()[name = string("op_1305"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_23_cast_fp16 = reshape(shape = var_1305, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1308 = const()[name = string("op_1308"), val = tensor([1, 4, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1308, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_1312 = const()[name = string("op_1312"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_23_cast_fp16 = reshape(shape = var_1312, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; + bool var_1315_transpose_x_1 = const()[name = string("op_1315_transpose_x_1"), val = bool(false)]; + bool var_1315_transpose_y_1 = const()[name = string("op_1315_transpose_y_1"), val = bool(true)]; + tensor var_1315_cast_fp16 = matmul(transpose_x = var_1315_transpose_x_1, transpose_y = var_1315_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1315_cast_fp16")]; + fp16 var_1316_to_fp16 = const()[name = string("op_1316_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_21_cast_fp16 = mul(x = var_1315_cast_fp16, y = var_1316_to_fp16)[name = string("attn_weights_21_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1327_axes_0 = const()[name = string("op_1327_axes_0"), val = tensor([-1])]; + bool var_1327_keep_dims_0 = const()[name = string("op_1327_keep_dims_0"), val = bool(true)]; + tensor var_1327_cast_fp16 = reduce_sum(axes = var_1327_axes_0, keep_dims = var_1327_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1327_cast_fp16")]; + tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1327_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; + bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; + bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; + tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; + tensor var_1330_perm_0 = const()[name = string("op_1330_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1332 = const()[name = string("op_1332"), val = tensor([1, 1, 2048])]; + tensor var_1330_cast_fp16 = transpose(perm = var_1330_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_10")]; + tensor input_75_cast_fp16 = reshape(shape = var_1332, x = var_1330_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271318592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273415808))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1343_axes_0 = const()[name = string("op_1343_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273424064)))]; + tensor var_1343_cast_fp16 = layer_norm(axes = var_1343_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1343_cast_fp16")]; + tensor var_1350 = const()[name = string("op_1350"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1351 = transpose(perm = var_1350, x = var_1343_cast_fp16)[name = string("transpose_9")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1351)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1373_axes_0 = const()[name = string("op_1373_axes_0"), val = tensor([2])]; + tensor var_1373 = squeeze(axes = var_1373_axes_0, x = hidden_states_47)[name = string("op_1373")]; + tensor var_1374 = const()[name = string("op_1374"), val = tensor([0, 2, 1])]; + tensor var_1375 = transpose(perm = var_1374, x = var_1373)[name = string("transpose_8")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1375)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1383_axes_0 = const()[name = string("op_1383_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273428224)))]; + tensor var_1383_cast_fp16 = layer_norm(axes = var_1383_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1383_cast_fp16")]; + tensor var_1386 = const()[name = string("op_1386"), val = tensor([0, 2, 1])]; + tensor var_1388_axes_0 = const()[name = string("op_1388_axes_0"), val = tensor([2])]; + tensor var_1387 = transpose(perm = var_1386, x = var_1383_cast_fp16)[name = string("transpose_7")]; + tensor var_1388 = expand_dims(axes = var_1388_axes_0, x = var_1387)[name = string("op_1388")]; + string var_1395_pad_type_0 = const()[name = string("op_1395_pad_type_0"), val = string("valid")]; + tensor var_1395_strides_0 = const()[name = string("op_1395_strides_0"), val = tensor([1, 1])]; + tensor var_1395_pad_0 = const()[name = string("op_1395_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1395_dilations_0 = const()[name = string("op_1395_dilations_0"), val = tensor([1, 1])]; + int32 var_1395_groups_0 = const()[name = string("op_1395_groups_0"), val = int32(1)]; + tensor var_1395 = conv(dilations = var_1395_dilations_0, groups = var_1395_groups_0, pad = var_1395_pad_0, pad_type = var_1395_pad_type_0, strides = var_1395_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1388)[name = string("op_1395")]; + tensor var_1396 = const()[name = string("op_1396"), val = tensor([1, 32, 1, 64])]; + tensor var_1397 = reshape(shape = var_1396, x = var_1395)[name = string("op_1397")]; + string var_1404_pad_type_0 = const()[name = string("op_1404_pad_type_0"), val = string("valid")]; + tensor var_1404_strides_0 = const()[name = string("op_1404_strides_0"), val = tensor([1, 1])]; + tensor var_1404_pad_0 = const()[name = string("op_1404_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1404_dilations_0 = const()[name = string("op_1404_dilations_0"), val = tensor([1, 1])]; + int32 var_1404_groups_0 = const()[name = string("op_1404_groups_0"), val = int32(1)]; + tensor var_1404 = conv(dilations = var_1404_dilations_0, groups = var_1404_groups_0, pad = var_1404_pad_0, pad_type = var_1404_pad_type_0, strides = var_1404_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1388)[name = string("op_1404")]; + tensor var_1405 = const()[name = string("op_1405"), val = tensor([1, 8, 1, 64])]; + tensor var_1406 = reshape(shape = var_1405, x = var_1404)[name = string("op_1406")]; + string var_1413_pad_type_0 = const()[name = string("op_1413_pad_type_0"), val = string("valid")]; + tensor var_1413_strides_0 = const()[name = string("op_1413_strides_0"), val = tensor([1, 1])]; + tensor var_1413_pad_0 = const()[name = string("op_1413_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1413_dilations_0 = const()[name = string("op_1413_dilations_0"), val = tensor([1, 1])]; + int32 var_1413_groups_0 = const()[name = string("op_1413_groups_0"), val = int32(1)]; + tensor var_1413 = conv(dilations = var_1413_dilations_0, groups = var_1413_groups_0, pad = var_1413_pad_0, pad_type = var_1413_pad_type_0, strides = var_1413_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1388)[name = string("op_1413")]; + tensor var_1414 = const()[name = string("op_1414"), val = tensor([1, 8, 1, 64])]; + tensor var_1415 = reshape(shape = var_1414, x = var_1413)[name = string("op_1415")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1397)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1397)[name = string("x2_25")]; + tensor var_1429_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1429_cast_fp16")]; + tensor var_1430_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1430_cast_fp16")]; + tensor var_1431_cast_fp16 = sub(x = var_1429_cast_fp16, y = var_1430_cast_fp16)[name = string("op_1431_cast_fp16")]; + tensor var_1432_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1432_cast_fp16")]; + tensor var_1433_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1433_cast_fp16")]; + tensor var_1434_cast_fp16 = add(x = var_1432_cast_fp16, y = var_1433_cast_fp16)[name = string("op_1434_cast_fp16")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25_cast_fp16 = concat(axis = var_50, interleave = rotated_25_interleave_0, values = (var_1431_cast_fp16, var_1434_cast_fp16))[name = string("rotated_25_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1406)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1406)[name = string("x2_27")]; + tensor var_1450_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1450_cast_fp16")]; + tensor var_1451_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1451_cast_fp16")]; + tensor var_1452_cast_fp16 = sub(x = var_1450_cast_fp16, y = var_1451_cast_fp16)[name = string("op_1452_cast_fp16")]; + tensor var_1453_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1453_cast_fp16")]; + tensor var_1454_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1454_cast_fp16")]; + tensor var_1455_cast_fp16 = add(x = var_1453_cast_fp16, y = var_1454_cast_fp16)[name = string("op_1455_cast_fp16")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27_cast_fp16 = concat(axis = var_50, interleave = rotated_27_interleave_0, values = (var_1452_cast_fp16, var_1455_cast_fp16))[name = string("rotated_27_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; + bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; + tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_350, concat_51_values3_0))[name = string("concat_51")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([22])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([23])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; + tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; + tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_350, concat_55_values3_0))[name = string("concat_55")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1415, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; + tensor var_1475_begin_0 = const()[name = string("op_1475_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_1475_end_0 = const()[name = string("op_1475_end_0"), val = tensor([7, 8, 1546, 64])]; + tensor var_1475_end_mask_0 = const()[name = string("op_1475_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1475_cast_fp16 = slice_by_index(begin = var_1475_begin_0, end = var_1475_end_0, end_mask = var_1475_end_mask_0, x = coreml_update_state_29)[name = string("op_1475_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1475_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1477_begin_0 = const()[name = string("op_1477_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_1477_end_0 = const()[name = string("op_1477_end_0"), val = tensor([23, 8, 1546, 64])]; + tensor var_1477_end_mask_0 = const()[name = string("op_1477_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1477_cast_fp16 = slice_by_index(begin = var_1477_begin_0, end = var_1477_end_0, end_mask = var_1477_end_mask_0, x = coreml_update_state_29)[name = string("op_1477_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1477_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1486 = const()[name = string("op_1486"), val = tensor([1, 4, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1486, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1490 = const()[name = string("op_1490"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_27_cast_fp16 = reshape(shape = var_1490, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1493 = const()[name = string("op_1493"), val = tensor([1, 4, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1493, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_1497 = const()[name = string("op_1497"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_27_cast_fp16 = reshape(shape = var_1497, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; + bool var_1500_transpose_x_1 = const()[name = string("op_1500_transpose_x_1"), val = bool(false)]; + bool var_1500_transpose_y_1 = const()[name = string("op_1500_transpose_y_1"), val = bool(true)]; + tensor var_1500_cast_fp16 = matmul(transpose_x = var_1500_transpose_x_1, transpose_y = var_1500_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1500_cast_fp16")]; + fp16 var_1501_to_fp16 = const()[name = string("op_1501_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_25_cast_fp16 = mul(x = var_1500_cast_fp16, y = var_1501_to_fp16)[name = string("attn_weights_25_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1512_axes_0 = const()[name = string("op_1512_axes_0"), val = tensor([-1])]; + bool var_1512_keep_dims_0 = const()[name = string("op_1512_keep_dims_0"), val = bool(true)]; + tensor var_1512_cast_fp16 = reduce_sum(axes = var_1512_axes_0, keep_dims = var_1512_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1512_cast_fp16")]; + tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1512_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; + bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; + bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; + tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1515_perm_0 = const()[name = string("op_1515_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1517 = const()[name = string("op_1517"), val = tensor([1, 1, 2048])]; + tensor var_1515_cast_fp16 = transpose(perm = var_1515_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_6")]; + tensor input_89_cast_fp16 = reshape(shape = var_1517, x = var_1515_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273432384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275529600))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1528_axes_0 = const()[name = string("op_1528_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275537856)))]; + tensor var_1528_cast_fp16 = layer_norm(axes = var_1528_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1528_cast_fp16")]; + tensor var_1535 = const()[name = string("op_1535"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1536 = transpose(perm = var_1535, x = var_1528_cast_fp16)[name = string("transpose_5")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1536)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1558_axes_0 = const()[name = string("op_1558_axes_0"), val = tensor([2])]; + tensor var_1558 = squeeze(axes = var_1558_axes_0, x = hidden_states_55)[name = string("op_1558")]; + tensor var_1559 = const()[name = string("op_1559"), val = tensor([0, 2, 1])]; + tensor var_1560 = transpose(perm = var_1559, x = var_1558)[name = string("transpose_4")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1560)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1568_axes_0 = const()[name = string("op_1568_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275542016)))]; + tensor var_1568_cast_fp16 = layer_norm(axes = var_1568_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1568_cast_fp16")]; + tensor var_1571 = const()[name = string("op_1571"), val = tensor([0, 2, 1])]; + tensor var_1573_axes_0 = const()[name = string("op_1573_axes_0"), val = tensor([2])]; + tensor var_1572 = transpose(perm = var_1571, x = var_1568_cast_fp16)[name = string("transpose_3")]; + tensor var_1573 = expand_dims(axes = var_1573_axes_0, x = var_1572)[name = string("op_1573")]; + string var_1580_pad_type_0 = const()[name = string("op_1580_pad_type_0"), val = string("valid")]; + tensor var_1580_strides_0 = const()[name = string("op_1580_strides_0"), val = tensor([1, 1])]; + tensor var_1580_pad_0 = const()[name = string("op_1580_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1580_dilations_0 = const()[name = string("op_1580_dilations_0"), val = tensor([1, 1])]; + int32 var_1580_groups_0 = const()[name = string("op_1580_groups_0"), val = int32(1)]; + tensor var_1580 = conv(dilations = var_1580_dilations_0, groups = var_1580_groups_0, pad = var_1580_pad_0, pad_type = var_1580_pad_type_0, strides = var_1580_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1573)[name = string("op_1580")]; + tensor var_1581 = const()[name = string("op_1581"), val = tensor([1, 32, 1, 64])]; + tensor var_1582 = reshape(shape = var_1581, x = var_1580)[name = string("op_1582")]; + string var_1589_pad_type_0 = const()[name = string("op_1589_pad_type_0"), val = string("valid")]; + tensor var_1589_strides_0 = const()[name = string("op_1589_strides_0"), val = tensor([1, 1])]; + tensor var_1589_pad_0 = const()[name = string("op_1589_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1589_dilations_0 = const()[name = string("op_1589_dilations_0"), val = tensor([1, 1])]; + int32 var_1589_groups_0 = const()[name = string("op_1589_groups_0"), val = int32(1)]; + tensor var_1589 = conv(dilations = var_1589_dilations_0, groups = var_1589_groups_0, pad = var_1589_pad_0, pad_type = var_1589_pad_type_0, strides = var_1589_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1573)[name = string("op_1589")]; + tensor var_1590 = const()[name = string("op_1590"), val = tensor([1, 8, 1, 64])]; + tensor var_1591 = reshape(shape = var_1590, x = var_1589)[name = string("op_1591")]; + string var_1598_pad_type_0 = const()[name = string("op_1598_pad_type_0"), val = string("valid")]; + tensor var_1598_strides_0 = const()[name = string("op_1598_strides_0"), val = tensor([1, 1])]; + tensor var_1598_pad_0 = const()[name = string("op_1598_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1598_dilations_0 = const()[name = string("op_1598_dilations_0"), val = tensor([1, 1])]; + int32 var_1598_groups_0 = const()[name = string("op_1598_groups_0"), val = int32(1)]; + tensor var_1598 = conv(dilations = var_1598_dilations_0, groups = var_1598_groups_0, pad = var_1598_pad_0, pad_type = var_1598_pad_type_0, strides = var_1598_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1573)[name = string("op_1598")]; + tensor var_1599 = const()[name = string("op_1599"), val = tensor([1, 8, 1, 64])]; + tensor var_1600 = reshape(shape = var_1599, x = var_1598)[name = string("op_1600")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 1, 32])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1582)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 1, 64])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1582)[name = string("x2_29")]; + tensor var_1614_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1614_cast_fp16")]; + tensor var_1615_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1615_cast_fp16")]; + tensor var_1616_cast_fp16 = sub(x = var_1614_cast_fp16, y = var_1615_cast_fp16)[name = string("op_1616_cast_fp16")]; + tensor var_1617_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1617_cast_fp16")]; + tensor var_1618_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1618_cast_fp16")]; + tensor var_1619_cast_fp16 = add(x = var_1617_cast_fp16, y = var_1618_cast_fp16)[name = string("op_1619_cast_fp16")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29_cast_fp16 = concat(axis = var_50, interleave = rotated_29_interleave_0, values = (var_1616_cast_fp16, var_1619_cast_fp16))[name = string("rotated_29_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 32])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1591)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 64])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1591)[name = string("x2")]; + tensor var_1635_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1635_cast_fp16")]; + tensor var_1636_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1636_cast_fp16")]; + tensor var_1637_cast_fp16 = sub(x = var_1635_cast_fp16, y = var_1636_cast_fp16)[name = string("op_1637_cast_fp16")]; + tensor var_1638_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1638_cast_fp16")]; + tensor var_1639_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1639_cast_fp16")]; + tensor var_1640_cast_fp16 = add(x = var_1638_cast_fp16, y = var_1639_cast_fp16)[name = string("op_1640_cast_fp16")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated_cast_fp16 = concat(axis = var_50, interleave = rotated_interleave_0, values = (var_1637_cast_fp16, var_1640_cast_fp16))[name = string("rotated_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; + int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; + bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; + tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; + tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; + tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_350, concat_59_values3_0))[name = string("concat_59")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_cast_fp16, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([23])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([24])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_350, concat_63_values3_0))[name = string("concat_63")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1600, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; + tensor var_1660_begin_0 = const()[name = string("op_1660_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_1660_end_0 = const()[name = string("op_1660_end_0"), val = tensor([8, 8, 1546, 64])]; + tensor var_1660_end_mask_0 = const()[name = string("op_1660_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1660_cast_fp16 = slice_by_index(begin = var_1660_begin_0, end = var_1660_end_0, end_mask = var_1660_end_mask_0, x = coreml_update_state_31)[name = string("op_1660_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1660_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1662_begin_0 = const()[name = string("op_1662_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_1662_end_0 = const()[name = string("op_1662_end_0"), val = tensor([24, 8, 1546, 64])]; + tensor var_1662_end_mask_0 = const()[name = string("op_1662_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1662_cast_fp16 = slice_by_index(begin = var_1662_begin_0, end = var_1662_end_0, end_mask = var_1662_end_mask_0, x = coreml_update_state_31)[name = string("op_1662_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1662_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1671 = const()[name = string("op_1671"), val = tensor([1, 4, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1671, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1675 = const()[name = string("op_1675"), val = tensor([1, -1, 1546, 64])]; + tensor key_states_cast_fp16 = reshape(shape = var_1675, x = x_209_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1678 = const()[name = string("op_1678"), val = tensor([1, 4, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1678, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1682 = const()[name = string("op_1682"), val = tensor([1, -1, 1546, 64])]; + tensor value_states_cast_fp16 = reshape(shape = var_1682, x = x_215_cast_fp16)[name = string("value_states_cast_fp16")]; + bool var_1685_transpose_x_1 = const()[name = string("op_1685_transpose_x_1"), val = bool(false)]; + bool var_1685_transpose_y_1 = const()[name = string("op_1685_transpose_y_1"), val = bool(true)]; + tensor var_1685_cast_fp16 = matmul(transpose_x = var_1685_transpose_x_1, transpose_y = var_1685_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_cast_fp16)[name = string("op_1685_cast_fp16")]; + fp16 var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_29_cast_fp16 = mul(x = var_1685_cast_fp16, y = var_1686_to_fp16)[name = string("attn_weights_29_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1697_axes_0 = const()[name = string("op_1697_axes_0"), val = tensor([-1])]; + bool var_1697_keep_dims_0 = const()[name = string("op_1697_keep_dims_0"), val = bool(true)]; + tensor var_1697_cast_fp16 = reduce_sum(axes = var_1697_axes_0, keep_dims = var_1697_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1697_cast_fp16")]; + tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1697_cast_fp16)[name = string("attn_weights_cast_fp16")]; + bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; + bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; + tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_43_cast_fp16")]; + tensor var_1700_perm_0 = const()[name = string("op_1700_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1702 = const()[name = string("op_1702"), val = tensor([1, 1, 2048])]; + tensor var_1700_cast_fp16 = transpose(perm = var_1700_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_2")]; + tensor input_103_cast_fp16 = reshape(shape = var_1702, x = var_1700_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275546176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277643392))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1713_axes_0 = const()[name = string("op_1713_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277651648)))]; + tensor var_1713_cast_fp16 = layer_norm(axes = var_1713_axes_0, epsilon = var_45_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1713_cast_fp16")]; + tensor var_1720 = const()[name = string("op_1720"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1721 = transpose(perm = var_1720, x = var_1713_cast_fp16)[name = string("transpose_1")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1721)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states")]; + tensor gate_states = silu(x = input_109)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1743_axes_0 = const()[name = string("op_1743_axes_0"), val = tensor([2])]; + tensor var_1743 = squeeze(axes = var_1743_axes_0, x = hidden_states_1)[name = string("op_1743")]; + tensor var_1744 = const()[name = string("op_1744"), val = tensor([0, 2, 1])]; + tensor var_1745 = transpose(perm = var_1744, x = var_1743)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_61_cast_fp16, y = var_1745)[name = string("op_1746_cast_fp16")]; + tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; + } -> (output_hidden_states); + func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { + tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; + tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201946816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210335488))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210368320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218756992))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; + tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218789824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227178496))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; + int32 var_45 = const()[name = string("op_45"), val = int32(-1)]; + int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; + tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; + int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; + tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; + tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; + int32 var_262_axis_0 = const()[name = string("op_262_axis_0"), val = int32(1)]; + int32 var_262_batch_dims_0 = const()[name = string("op_262_batch_dims_0"), val = int32(0)]; + bool var_262_validate_indices_0 = const()[name = string("op_262_validate_indices_0"), val = bool(false)]; + tensor var_56_to_fp16 = const()[name = string("op_56_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243964032)))]; + tensor var_262_cast_fp16 = gather(axis = var_262_axis_0, batch_dims = var_262_batch_dims_0, indices = select_0, validate_indices = var_262_validate_indices_0, x = var_56_to_fp16)[name = string("op_262_cast_fp16")]; + tensor var_263 = const()[name = string("op_263"), val = tensor([1, 64, 1, 64])]; + tensor cos_1_cast_fp16 = reshape(shape = var_263, x = var_262_cast_fp16)[name = string("cos_1_cast_fp16")]; + int32 var_267_axis_0 = const()[name = string("op_267_axis_0"), val = int32(1)]; + int32 var_267_batch_dims_0 = const()[name = string("op_267_batch_dims_0"), val = int32(0)]; + bool var_267_validate_indices_0 = const()[name = string("op_267_validate_indices_0"), val = bool(false)]; + tensor var_51_to_fp16 = const()[name = string("op_51_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752)))]; + tensor var_267_cast_fp16 = gather(axis = var_267_axis_0, batch_dims = var_267_batch_dims_0, indices = select_0, validate_indices = var_267_validate_indices_0, x = var_51_to_fp16)[name = string("op_267_cast_fp16")]; + tensor var_268 = const()[name = string("op_268"), val = tensor([1, 64, 1, 64])]; + tensor sin_1_cast_fp16 = reshape(shape = var_268, x = var_267_cast_fp16)[name = string("sin_1_cast_fp16")]; + tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; + bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; + tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; + tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor var_278_axes_0 = const()[name = string("op_278_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260741312)))]; + fp16 var_47_to_fp16 = const()[name = string("op_47_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_278_cast_fp16 = layer_norm(axes = var_278_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_278_cast_fp16")]; + tensor var_282 = const()[name = string("op_282"), val = tensor([0, 2, 1])]; + tensor var_284_axes_0 = const()[name = string("op_284_axes_0"), val = tensor([2])]; + tensor var_283 = transpose(perm = var_282, x = var_278_cast_fp16)[name = string("transpose_57")]; + tensor var_284 = expand_dims(axes = var_284_axes_0, x = var_283)[name = string("op_284")]; + string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; + tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; + tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; + int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; + tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_284)[name = string("query_states_1")]; + string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; + tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; + tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; + int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; + tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_284)[name = string("key_states_1")]; + string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; + tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; + tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; + int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; + tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_284)[name = string("value_states_1")]; + tensor var_304 = const()[name = string("op_304"), val = tensor([1, 32, 64, 64])]; + tensor var_305 = reshape(shape = var_304, x = query_states_1)[name = string("op_305")]; + tensor var_306 = const()[name = string("op_306"), val = tensor([0, 1, 3, 2])]; + tensor var_308 = const()[name = string("op_308"), val = tensor([1, 8, 64, 64])]; + tensor var_309 = reshape(shape = var_308, x = key_states_1)[name = string("op_309")]; + tensor var_310 = const()[name = string("op_310"), val = tensor([0, 1, 3, 2])]; + tensor var_312 = const()[name = string("op_312"), val = tensor([1, 8, 64, 64])]; + tensor var_313 = reshape(shape = var_312, x = value_states_1)[name = string("op_313")]; + tensor var_314 = const()[name = string("op_314"), val = tensor([0, 1, 3, 2])]; + tensor var_316 = const()[name = string("op_316"), val = tensor([0, 2, 1, 3])]; + tensor var_318 = const()[name = string("op_318"), val = tensor([0, 2, 1, 3])]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_1 = transpose(perm = var_306, x = var_305)[name = string("transpose_56")]; + tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; + tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 32])]; + tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor cos_5 = transpose(perm = var_316, x = cos_1_cast_fp16)[name = string("transpose_55")]; + tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; + tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 32])]; + tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor sin_5 = transpose(perm = var_318, x = sin_1_cast_fp16)[name = string("transpose_54")]; + tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; + tensor var_332 = mul(x = x1_1, y = cos_7)[name = string("op_332")]; + tensor var_333 = mul(x = x2_1, y = sin_7)[name = string("op_333")]; + tensor var_334 = sub(x = var_332, y = var_333)[name = string("op_334")]; + tensor var_335 = mul(x = x2_1, y = cos_7)[name = string("op_335")]; + tensor var_336 = mul(x = x1_1, y = sin_7)[name = string("op_336")]; + tensor var_337 = add(x = var_335, y = var_336)[name = string("op_337")]; + bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; + tensor rotated_1 = concat(axis = var_45, interleave = rotated_1_interleave_0, values = (var_334, var_337))[name = string("rotated_1")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_5 = transpose(perm = var_310, x = var_309)[name = string("transpose_53")]; + tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; + tensor var_353 = mul(x = x1_3, y = cos_7)[name = string("op_353")]; + tensor var_354 = mul(x = x2_3, y = sin_7)[name = string("op_354")]; + tensor var_355 = sub(x = var_353, y = var_354)[name = string("op_355")]; + tensor var_356 = mul(x = x2_3, y = cos_7)[name = string("op_356")]; + tensor var_357 = mul(x = x1_3, y = sin_7)[name = string("op_357")]; + tensor var_358 = add(x = var_356, y = var_357)[name = string("op_358")]; + bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; + tensor rotated_3 = concat(axis = var_45, interleave = rotated_3_interleave_0, values = (var_355, var_358))[name = string("rotated_3")]; + tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; + tensor var_367 = add(x = current_pos, y = seq_length_1)[name = string("op_367")]; + tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; + int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; + bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; + tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; + tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; + tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; + int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; + bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; + tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_367, concat_3_values3_0))[name = string("concat_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([16])]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([17])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; + tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; + tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; + int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; + bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; + tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_367, concat_7_values3_0))[name = string("concat_7")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_3 = transpose(perm = var_314, x = var_313)[name = string("transpose_52")]; + tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; + tensor var_381_begin_0 = const()[name = string("op_381_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_381_end_0 = const()[name = string("op_381_end_0"), val = tensor([1, 8, 1546, 64])]; + tensor var_381_end_mask_0 = const()[name = string("op_381_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_381_cast_fp16 = slice_by_index(begin = var_381_begin_0, end = var_381_end_0, end_mask = var_381_end_mask_0, x = coreml_update_state_17)[name = string("op_381_cast_fp16")]; + tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; + tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_381_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; + tensor var_383_begin_0 = const()[name = string("op_383_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = string("op_383_end_0"), val = tensor([17, 8, 1546, 64])]; + tensor var_383_end_mask_0 = const()[name = string("op_383_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = coreml_update_state_17)[name = string("op_383_cast_fp16")]; + tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; + tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_383_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; + tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; + tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_392 = const()[name = string("op_392"), val = tensor([1, 4, 1, 1])]; + tensor x_13_cast_fp16 = tile(reps = var_392, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_396 = const()[name = string("op_396"), val = tensor([1, -1, 1546, 64])]; + tensor var_397_cast_fp16 = reshape(shape = var_396, x = x_13_cast_fp16)[name = string("op_397_cast_fp16")]; + tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_399 = const()[name = string("op_399"), val = tensor([1, 4, 1, 1])]; + tensor x_19_cast_fp16 = tile(reps = var_399, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; + bool var_406_transpose_x_0 = const()[name = string("op_406_transpose_x_0"), val = bool(false)]; + bool var_406_transpose_y_0 = const()[name = string("op_406_transpose_y_0"), val = bool(true)]; + tensor var_406_cast_fp16 = matmul(transpose_x = var_406_transpose_x_0, transpose_y = var_406_transpose_y_0, x = rotated_1, y = var_397_cast_fp16)[name = string("op_406_cast_fp16")]; + fp16 var_407_to_fp16 = const()[name = string("op_407_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_1_cast_fp16 = mul(x = var_406_cast_fp16, y = var_407_to_fp16)[name = string("attn_weights_1_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; + tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; + bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; + tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; + tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; + tensor var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor([-1])]; + bool var_418_keep_dims_0 = const()[name = string("op_418_keep_dims_0"), val = bool(true)]; + tensor var_418_cast_fp16 = reduce_sum(axes = var_418_axes_0, keep_dims = var_418_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor var_419_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_418_cast_fp16)[name = string("op_419_cast_fp16")]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 64, 1546])]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_419_cast_fp16)[name = string("reshape_0_cast_fp16")]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1546, 64])]; + tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; + bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; + bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 64, 64])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; + tensor var_422_perm_0 = const()[name = string("op_422_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_424 = const()[name = string("op_424"), val = tensor([1, 64, 2048])]; + tensor var_422_cast_fp16 = transpose(perm = var_422_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_51")]; + tensor input_5_cast_fp16 = reshape(shape = var_424, x = var_422_cast_fp16)[name = string("input_5_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262842688))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262850944)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; + tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; + bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; + tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; + tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor var_435_axes_0 = const()[name = string("op_435_axes_0"), val = tensor([-1])]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262855104)))]; + tensor var_435_cast_fp16 = layer_norm(axes = var_435_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_435_cast_fp16")]; + tensor var_442 = const()[name = string("op_442"), val = tensor([0, 2, 1])]; + tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; + tensor var_443 = transpose(perm = var_442, x = var_435_cast_fp16)[name = string("transpose_50")]; + tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_443)[name = string("input_9")]; + string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; + tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; + tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; + int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; + tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; + string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; + tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; + tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; + int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; + tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; + tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; + tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; + string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; + tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; + tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; + tensor var_465_axes_0 = const()[name = string("op_465_axes_0"), val = tensor([2])]; + tensor var_465 = squeeze(axes = var_465_axes_0, x = hidden_states_7)[name = string("op_465")]; + tensor var_466 = const()[name = string("op_466"), val = tensor([0, 2, 1])]; + tensor var_467 = transpose(perm = var_466, x = var_465)[name = string("transpose_49")]; + tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_467)[name = string("hidden_states_9_cast_fp16")]; + tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; + bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; + tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; + tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor var_475_axes_0 = const()[name = string("op_475_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262859264)))]; + tensor var_475_cast_fp16 = layer_norm(axes = var_475_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_475_cast_fp16")]; + tensor var_479 = const()[name = string("op_479"), val = tensor([0, 2, 1])]; + tensor var_481_axes_0 = const()[name = string("op_481_axes_0"), val = tensor([2])]; + tensor var_480 = transpose(perm = var_479, x = var_475_cast_fp16)[name = string("transpose_48")]; + tensor var_481 = expand_dims(axes = var_481_axes_0, x = var_480)[name = string("op_481")]; + string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; + tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; + tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; + int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; + tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_481)[name = string("query_states_5")]; + string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; + tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; + tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; + int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; + tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_481)[name = string("key_states_7")]; + string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; + tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; + tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; + int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; + tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_481)[name = string("value_states_7")]; + tensor var_501 = const()[name = string("op_501"), val = tensor([1, 32, 64, 64])]; + tensor var_502 = reshape(shape = var_501, x = query_states_5)[name = string("op_502")]; + tensor var_503 = const()[name = string("op_503"), val = tensor([0, 1, 3, 2])]; + tensor var_505 = const()[name = string("op_505"), val = tensor([1, 8, 64, 64])]; + tensor var_506 = reshape(shape = var_505, x = key_states_7)[name = string("op_506")]; + tensor var_507 = const()[name = string("op_507"), val = tensor([0, 1, 3, 2])]; + tensor var_509 = const()[name = string("op_509"), val = tensor([1, 8, 64, 64])]; + tensor var_510 = reshape(shape = var_509, x = value_states_7)[name = string("op_510")]; + tensor var_511 = const()[name = string("op_511"), val = tensor([0, 1, 3, 2])]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_29 = transpose(perm = var_503, x = var_502)[name = string("transpose_47")]; + tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; + tensor var_529 = mul(x = x1_5, y = cos_7)[name = string("op_529")]; + tensor var_530 = mul(x = x2_5, y = sin_7)[name = string("op_530")]; + tensor var_531 = sub(x = var_529, y = var_530)[name = string("op_531")]; + tensor var_532 = mul(x = x2_5, y = cos_7)[name = string("op_532")]; + tensor var_533 = mul(x = x1_5, y = sin_7)[name = string("op_533")]; + tensor var_534 = add(x = var_532, y = var_533)[name = string("op_534")]; + bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; + tensor rotated_5 = concat(axis = var_45, interleave = rotated_5_interleave_0, values = (var_531, var_534))[name = string("rotated_5")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_33 = transpose(perm = var_507, x = var_506)[name = string("transpose_46")]; + tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; + tensor var_550 = mul(x = x1_7, y = cos_7)[name = string("op_550")]; + tensor var_551 = mul(x = x2_7, y = sin_7)[name = string("op_551")]; + tensor var_552 = sub(x = var_550, y = var_551)[name = string("op_552")]; + tensor var_553 = mul(x = x2_7, y = cos_7)[name = string("op_553")]; + tensor var_554 = mul(x = x1_7, y = sin_7)[name = string("op_554")]; + tensor var_555 = add(x = var_553, y = var_554)[name = string("op_555")]; + bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; + tensor rotated_7 = concat(axis = var_45, interleave = rotated_7_interleave_0, values = (var_552, var_555))[name = string("rotated_7")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; + int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; + bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; + tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_367, concat_21_values3_0))[name = string("concat_21")]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([17])]; + tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; + tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; + tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([18])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; + tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; + tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; + int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; + bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; + tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_367, concat_25_values3_0))[name = string("concat_25")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_9 = transpose(perm = var_511, x = var_510)[name = string("transpose_45")]; + tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; + tensor var_578_begin_0 = const()[name = string("op_578_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor var_578_end_0 = const()[name = string("op_578_end_0"), val = tensor([2, 8, 1546, 64])]; + tensor var_578_end_mask_0 = const()[name = string("op_578_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = coreml_update_state_19)[name = string("op_578_cast_fp16")]; + tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; + tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_578_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; + tensor var_580_begin_0 = const()[name = string("op_580_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor var_580_end_0 = const()[name = string("op_580_end_0"), val = tensor([18, 8, 1546, 64])]; + tensor var_580_end_mask_0 = const()[name = string("op_580_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_580_cast_fp16 = slice_by_index(begin = var_580_begin_0, end = var_580_end_0, end_mask = var_580_end_mask_0, x = coreml_update_state_19)[name = string("op_580_cast_fp16")]; + tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; + tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_580_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; + tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; + tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor var_589 = const()[name = string("op_589"), val = tensor([1, 4, 1, 1])]; + tensor x_41_cast_fp16 = tile(reps = var_589, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_593 = const()[name = string("op_593"), val = tensor([1, -1, 1546, 64])]; + tensor var_594_cast_fp16 = reshape(shape = var_593, x = x_41_cast_fp16)[name = string("op_594_cast_fp16")]; + tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_596 = const()[name = string("op_596"), val = tensor([1, 4, 1, 1])]; + tensor x_47_cast_fp16 = tile(reps = var_596, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; + bool var_603_transpose_x_0 = const()[name = string("op_603_transpose_x_0"), val = bool(false)]; + bool var_603_transpose_y_0 = const()[name = string("op_603_transpose_y_0"), val = bool(true)]; + tensor var_603_cast_fp16 = matmul(transpose_x = var_603_transpose_x_0, transpose_y = var_603_transpose_y_0, x = rotated_5, y = var_594_cast_fp16)[name = string("op_603_cast_fp16")]; + fp16 var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_3_cast_fp16 = mul(x = var_603_cast_fp16, y = var_604_to_fp16)[name = string("attn_weights_3_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; + tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; + bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; + tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; + tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; + tensor var_615_axes_0 = const()[name = string("op_615_axes_0"), val = tensor([-1])]; + bool var_615_keep_dims_0 = const()[name = string("op_615_keep_dims_0"), val = bool(true)]; + tensor var_615_cast_fp16 = reduce_sum(axes = var_615_axes_0, keep_dims = var_615_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_615_cast_fp16)[name = string("op_616_cast_fp16")]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 64, 1546])]; + tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_616_cast_fp16)[name = string("reshape_3_cast_fp16")]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1546, 64])]; + tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; + bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; + bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 64, 64])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; + tensor var_619_perm_0 = const()[name = string("op_619_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_621 = const()[name = string("op_621"), val = tensor([1, 64, 2048])]; + tensor var_619_cast_fp16 = transpose(perm = var_619_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_44")]; + tensor input_19_cast_fp16 = reshape(shape = var_621, x = var_619_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262863424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264960640))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; + tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; + bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; + tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; + tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; + tensor var_632_axes_0 = const()[name = string("op_632_axes_0"), val = tensor([-1])]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264968896)))]; + tensor var_632_cast_fp16 = layer_norm(axes = var_632_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_632_cast_fp16")]; + tensor var_639 = const()[name = string("op_639"), val = tensor([0, 2, 1])]; + tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; + tensor var_640 = transpose(perm = var_639, x = var_632_cast_fp16)[name = string("transpose_43")]; + tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_640)[name = string("input_23")]; + string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; + tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; + tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; + int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; + tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; + string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; + tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; + tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; + int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; + tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; + tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; + tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; + string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; + tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; + tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; + tensor var_662_axes_0 = const()[name = string("op_662_axes_0"), val = tensor([2])]; + tensor var_662 = squeeze(axes = var_662_axes_0, x = hidden_states_15)[name = string("op_662")]; + tensor var_663 = const()[name = string("op_663"), val = tensor([0, 2, 1])]; + tensor var_664 = transpose(perm = var_663, x = var_662)[name = string("transpose_42")]; + tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_664)[name = string("hidden_states_17_cast_fp16")]; + tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; + bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; + tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; + tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; + tensor var_672_axes_0 = const()[name = string("op_672_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264973056)))]; + tensor var_672_cast_fp16 = layer_norm(axes = var_672_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_672_cast_fp16")]; + tensor var_676 = const()[name = string("op_676"), val = tensor([0, 2, 1])]; + tensor var_678_axes_0 = const()[name = string("op_678_axes_0"), val = tensor([2])]; + tensor var_677 = transpose(perm = var_676, x = var_672_cast_fp16)[name = string("transpose_41")]; + tensor var_678 = expand_dims(axes = var_678_axes_0, x = var_677)[name = string("op_678")]; + string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; + tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; + tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; + int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; + tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_678)[name = string("query_states_9")]; + string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; + tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; + tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; + int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; + tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_678)[name = string("key_states_13")]; + string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; + tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; + tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; + int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; + tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_678)[name = string("value_states_13")]; + tensor var_698 = const()[name = string("op_698"), val = tensor([1, 32, 64, 64])]; + tensor var_699 = reshape(shape = var_698, x = query_states_9)[name = string("op_699")]; + tensor var_700 = const()[name = string("op_700"), val = tensor([0, 1, 3, 2])]; + tensor var_702 = const()[name = string("op_702"), val = tensor([1, 8, 64, 64])]; + tensor var_703 = reshape(shape = var_702, x = key_states_13)[name = string("op_703")]; + tensor var_704 = const()[name = string("op_704"), val = tensor([0, 1, 3, 2])]; + tensor var_706 = const()[name = string("op_706"), val = tensor([1, 8, 64, 64])]; + tensor var_707 = reshape(shape = var_706, x = value_states_13)[name = string("op_707")]; + tensor var_708 = const()[name = string("op_708"), val = tensor([0, 1, 3, 2])]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_57 = transpose(perm = var_700, x = var_699)[name = string("transpose_40")]; + tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; + tensor var_726 = mul(x = x1_9, y = cos_7)[name = string("op_726")]; + tensor var_727 = mul(x = x2_9, y = sin_7)[name = string("op_727")]; + tensor var_728 = sub(x = var_726, y = var_727)[name = string("op_728")]; + tensor var_729 = mul(x = x2_9, y = cos_7)[name = string("op_729")]; + tensor var_730 = mul(x = x1_9, y = sin_7)[name = string("op_730")]; + tensor var_731 = add(x = var_729, y = var_730)[name = string("op_731")]; + bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; + tensor rotated_9 = concat(axis = var_45, interleave = rotated_9_interleave_0, values = (var_728, var_731))[name = string("rotated_9")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_61 = transpose(perm = var_704, x = var_703)[name = string("transpose_39")]; + tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; + tensor var_747 = mul(x = x1_11, y = cos_7)[name = string("op_747")]; + tensor var_748 = mul(x = x2_11, y = sin_7)[name = string("op_748")]; + tensor var_749 = sub(x = var_747, y = var_748)[name = string("op_749")]; + tensor var_750 = mul(x = x2_11, y = cos_7)[name = string("op_750")]; + tensor var_751 = mul(x = x1_11, y = sin_7)[name = string("op_751")]; + tensor var_752 = add(x = var_750, y = var_751)[name = string("op_752")]; + bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; + tensor rotated_11 = concat(axis = var_45, interleave = rotated_11_interleave_0, values = (var_749, var_752))[name = string("rotated_11")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_367, concat_39_values3_0))[name = string("concat_39")]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; + tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([18])]; + tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; + tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([19])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; + tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; + tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_367, concat_43_values3_0))[name = string("concat_43")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_15 = transpose(perm = var_708, x = var_707)[name = string("transpose_38")]; + tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; + tensor var_775_begin_0 = const()[name = string("op_775_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor var_775_end_0 = const()[name = string("op_775_end_0"), val = tensor([3, 8, 1546, 64])]; + tensor var_775_end_mask_0 = const()[name = string("op_775_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_775_cast_fp16 = slice_by_index(begin = var_775_begin_0, end = var_775_end_0, end_mask = var_775_end_mask_0, x = coreml_update_state_21)[name = string("op_775_cast_fp16")]; + tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; + tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_775_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; + tensor var_777_begin_0 = const()[name = string("op_777_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor var_777_end_0 = const()[name = string("op_777_end_0"), val = tensor([19, 8, 1546, 64])]; + tensor var_777_end_mask_0 = const()[name = string("op_777_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_777_cast_fp16 = slice_by_index(begin = var_777_begin_0, end = var_777_end_0, end_mask = var_777_end_mask_0, x = coreml_update_state_21)[name = string("op_777_cast_fp16")]; + tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; + tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_777_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; + tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; + tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_786 = const()[name = string("op_786"), val = tensor([1, 4, 1, 1])]; + tensor x_69_cast_fp16 = tile(reps = var_786, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_790 = const()[name = string("op_790"), val = tensor([1, -1, 1546, 64])]; + tensor var_791_cast_fp16 = reshape(shape = var_790, x = x_69_cast_fp16)[name = string("op_791_cast_fp16")]; + tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_793 = const()[name = string("op_793"), val = tensor([1, 4, 1, 1])]; + tensor x_75_cast_fp16 = tile(reps = var_793, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; + bool var_800_transpose_x_0 = const()[name = string("op_800_transpose_x_0"), val = bool(false)]; + bool var_800_transpose_y_0 = const()[name = string("op_800_transpose_y_0"), val = bool(true)]; + tensor var_800_cast_fp16 = matmul(transpose_x = var_800_transpose_x_0, transpose_y = var_800_transpose_y_0, x = rotated_9, y = var_791_cast_fp16)[name = string("op_800_cast_fp16")]; + fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_5_cast_fp16 = mul(x = var_800_cast_fp16, y = var_801_to_fp16)[name = string("attn_weights_5_cast_fp16")]; + tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; + tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; + bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; + tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; + tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; + tensor var_812_axes_0 = const()[name = string("op_812_axes_0"), val = tensor([-1])]; + bool var_812_keep_dims_0 = const()[name = string("op_812_keep_dims_0"), val = bool(true)]; + tensor var_812_cast_fp16 = reduce_sum(axes = var_812_axes_0, keep_dims = var_812_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_812_cast_fp16")]; + tensor var_813_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_812_cast_fp16)[name = string("op_813_cast_fp16")]; + tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 64, 1546])]; + tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_813_cast_fp16)[name = string("reshape_6_cast_fp16")]; + tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1546, 64])]; + tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; + bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; + bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 64, 64])]; + tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; + tensor var_816_perm_0 = const()[name = string("op_816_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_818 = const()[name = string("op_818"), val = tensor([1, 64, 2048])]; + tensor var_816_cast_fp16 = transpose(perm = var_816_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_37")]; + tensor input_33_cast_fp16 = reshape(shape = var_818, x = var_816_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264977216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267074432))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; + tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; + bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; + tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; + tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor var_829_axes_0 = const()[name = string("op_829_axes_0"), val = tensor([-1])]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267082688)))]; + tensor var_829_cast_fp16 = layer_norm(axes = var_829_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_829_cast_fp16")]; + tensor var_836 = const()[name = string("op_836"), val = tensor([0, 2, 1])]; + tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; + tensor var_837 = transpose(perm = var_836, x = var_829_cast_fp16)[name = string("transpose_36")]; + tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_837)[name = string("input_37")]; + string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; + tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; + tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; + int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; + tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; + string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; + tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; + tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; + int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; + tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; + tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; + tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; + string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; + tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; + tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; + tensor var_859_axes_0 = const()[name = string("op_859_axes_0"), val = tensor([2])]; + tensor var_859 = squeeze(axes = var_859_axes_0, x = hidden_states_23)[name = string("op_859")]; + tensor var_860 = const()[name = string("op_860"), val = tensor([0, 2, 1])]; + tensor var_861 = transpose(perm = var_860, x = var_859)[name = string("transpose_35")]; + tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_861)[name = string("hidden_states_25_cast_fp16")]; + tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; + bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; + tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; + tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor var_869_axes_0 = const()[name = string("op_869_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267086848)))]; + tensor var_869_cast_fp16 = layer_norm(axes = var_869_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_869_cast_fp16")]; + tensor var_873 = const()[name = string("op_873"), val = tensor([0, 2, 1])]; + tensor var_875_axes_0 = const()[name = string("op_875_axes_0"), val = tensor([2])]; + tensor var_874 = transpose(perm = var_873, x = var_869_cast_fp16)[name = string("transpose_34")]; + tensor var_875 = expand_dims(axes = var_875_axes_0, x = var_874)[name = string("op_875")]; + string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; + tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; + tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; + int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; + tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_875)[name = string("query_states_13")]; + string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; + tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; + tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; + int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; + tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_875)[name = string("key_states_19")]; + string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; + tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; + tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; + int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; + tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_875)[name = string("value_states_19")]; + tensor var_895 = const()[name = string("op_895"), val = tensor([1, 32, 64, 64])]; + tensor var_896 = reshape(shape = var_895, x = query_states_13)[name = string("op_896")]; + tensor var_897 = const()[name = string("op_897"), val = tensor([0, 1, 3, 2])]; + tensor var_899 = const()[name = string("op_899"), val = tensor([1, 8, 64, 64])]; + tensor var_900 = reshape(shape = var_899, x = key_states_19)[name = string("op_900")]; + tensor var_901 = const()[name = string("op_901"), val = tensor([0, 1, 3, 2])]; + tensor var_903 = const()[name = string("op_903"), val = tensor([1, 8, 64, 64])]; + tensor var_904 = reshape(shape = var_903, x = value_states_19)[name = string("op_904")]; + tensor var_905 = const()[name = string("op_905"), val = tensor([0, 1, 3, 2])]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_85 = transpose(perm = var_897, x = var_896)[name = string("transpose_33")]; + tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; + tensor var_923 = mul(x = x1_13, y = cos_7)[name = string("op_923")]; + tensor var_924 = mul(x = x2_13, y = sin_7)[name = string("op_924")]; + tensor var_925 = sub(x = var_923, y = var_924)[name = string("op_925")]; + tensor var_926 = mul(x = x2_13, y = cos_7)[name = string("op_926")]; + tensor var_927 = mul(x = x1_13, y = sin_7)[name = string("op_927")]; + tensor var_928 = add(x = var_926, y = var_927)[name = string("op_928")]; + bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; + tensor rotated_13 = concat(axis = var_45, interleave = rotated_13_interleave_0, values = (var_925, var_928))[name = string("rotated_13")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_89 = transpose(perm = var_901, x = var_900)[name = string("transpose_32")]; + tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; + tensor var_944 = mul(x = x1_15, y = cos_7)[name = string("op_944")]; + tensor var_945 = mul(x = x2_15, y = sin_7)[name = string("op_945")]; + tensor var_946 = sub(x = var_944, y = var_945)[name = string("op_946")]; + tensor var_947 = mul(x = x2_15, y = cos_7)[name = string("op_947")]; + tensor var_948 = mul(x = x1_15, y = sin_7)[name = string("op_948")]; + tensor var_949 = add(x = var_947, y = var_948)[name = string("op_949")]; + bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; + tensor rotated_15 = concat(axis = var_45, interleave = rotated_15_interleave_0, values = (var_946, var_949))[name = string("rotated_15")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_367, concat_57_values3_0))[name = string("concat_57")]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; + tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([19])]; + tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; + tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; + tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([20])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; + tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; + tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; + int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; + bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; + tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_367, concat_61_values3_0))[name = string("concat_61")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_21 = transpose(perm = var_905, x = var_904)[name = string("transpose_31")]; + tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; + tensor var_972_begin_0 = const()[name = string("op_972_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor var_972_end_0 = const()[name = string("op_972_end_0"), val = tensor([4, 8, 1546, 64])]; + tensor var_972_end_mask_0 = const()[name = string("op_972_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = coreml_update_state_23)[name = string("op_972_cast_fp16")]; + tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; + tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_972_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; + tensor var_974_begin_0 = const()[name = string("op_974_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor var_974_end_0 = const()[name = string("op_974_end_0"), val = tensor([20, 8, 1546, 64])]; + tensor var_974_end_mask_0 = const()[name = string("op_974_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_974_cast_fp16 = slice_by_index(begin = var_974_begin_0, end = var_974_end_0, end_mask = var_974_end_mask_0, x = coreml_update_state_23)[name = string("op_974_cast_fp16")]; + tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; + tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_974_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; + tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; + tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_983 = const()[name = string("op_983"), val = tensor([1, 4, 1, 1])]; + tensor x_97_cast_fp16 = tile(reps = var_983, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_987 = const()[name = string("op_987"), val = tensor([1, -1, 1546, 64])]; + tensor var_988_cast_fp16 = reshape(shape = var_987, x = x_97_cast_fp16)[name = string("op_988_cast_fp16")]; + tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_990 = const()[name = string("op_990"), val = tensor([1, 4, 1, 1])]; + tensor x_103_cast_fp16 = tile(reps = var_990, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; + bool var_997_transpose_x_0 = const()[name = string("op_997_transpose_x_0"), val = bool(false)]; + bool var_997_transpose_y_0 = const()[name = string("op_997_transpose_y_0"), val = bool(true)]; + tensor var_997_cast_fp16 = matmul(transpose_x = var_997_transpose_x_0, transpose_y = var_997_transpose_y_0, x = rotated_13, y = var_988_cast_fp16)[name = string("op_997_cast_fp16")]; + fp16 var_998_to_fp16 = const()[name = string("op_998_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_7_cast_fp16 = mul(x = var_997_cast_fp16, y = var_998_to_fp16)[name = string("attn_weights_7_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; + tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; + bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; + tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; + tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; + tensor var_1009_axes_0 = const()[name = string("op_1009_axes_0"), val = tensor([-1])]; + bool var_1009_keep_dims_0 = const()[name = string("op_1009_keep_dims_0"), val = bool(true)]; + tensor var_1009_cast_fp16 = reduce_sum(axes = var_1009_axes_0, keep_dims = var_1009_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1009_cast_fp16")]; + tensor var_1010_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1009_cast_fp16)[name = string("op_1010_cast_fp16")]; + tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 64, 1546])]; + tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1010_cast_fp16)[name = string("reshape_9_cast_fp16")]; + tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1546, 64])]; + tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; + bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; + bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 64, 64])]; + tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; + tensor var_1013_perm_0 = const()[name = string("op_1013_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1015 = const()[name = string("op_1015"), val = tensor([1, 64, 2048])]; + tensor var_1013_cast_fp16 = transpose(perm = var_1013_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_30")]; + tensor input_47_cast_fp16 = reshape(shape = var_1015, x = var_1013_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267091008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269188224))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; + bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; + tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; + tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor var_1026_axes_0 = const()[name = string("op_1026_axes_0"), val = tensor([-1])]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196480)))]; + tensor var_1026_cast_fp16 = layer_norm(axes = var_1026_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1026_cast_fp16")]; + tensor var_1033 = const()[name = string("op_1033"), val = tensor([0, 2, 1])]; + tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; + tensor var_1034 = transpose(perm = var_1033, x = var_1026_cast_fp16)[name = string("transpose_29")]; + tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1034)[name = string("input_51")]; + string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; + tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; + tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; + int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; + tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; + string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; + tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; + tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; + int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; + tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; + tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; + tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; + string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; + tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; + tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; + tensor var_1056_axes_0 = const()[name = string("op_1056_axes_0"), val = tensor([2])]; + tensor var_1056 = squeeze(axes = var_1056_axes_0, x = hidden_states_31)[name = string("op_1056")]; + tensor var_1057 = const()[name = string("op_1057"), val = tensor([0, 2, 1])]; + tensor var_1058 = transpose(perm = var_1057, x = var_1056)[name = string("transpose_28")]; + tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1058)[name = string("hidden_states_33_cast_fp16")]; + tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; + bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; + tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; + tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor var_1066_axes_0 = const()[name = string("op_1066_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269200640)))]; + tensor var_1066_cast_fp16 = layer_norm(axes = var_1066_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1066_cast_fp16")]; + tensor var_1070 = const()[name = string("op_1070"), val = tensor([0, 2, 1])]; + tensor var_1072_axes_0 = const()[name = string("op_1072_axes_0"), val = tensor([2])]; + tensor var_1071 = transpose(perm = var_1070, x = var_1066_cast_fp16)[name = string("transpose_27")]; + tensor var_1072 = expand_dims(axes = var_1072_axes_0, x = var_1071)[name = string("op_1072")]; + string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; + tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; + tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; + int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; + tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1072)[name = string("query_states_17")]; + string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; + tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; + tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; + int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; + tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1072)[name = string("key_states_25")]; + string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; + tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; + tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; + int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; + tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1072)[name = string("value_states_25")]; + tensor var_1092 = const()[name = string("op_1092"), val = tensor([1, 32, 64, 64])]; + tensor var_1093 = reshape(shape = var_1092, x = query_states_17)[name = string("op_1093")]; + tensor var_1094 = const()[name = string("op_1094"), val = tensor([0, 1, 3, 2])]; + tensor var_1096 = const()[name = string("op_1096"), val = tensor([1, 8, 64, 64])]; + tensor var_1097 = reshape(shape = var_1096, x = key_states_25)[name = string("op_1097")]; + tensor var_1098 = const()[name = string("op_1098"), val = tensor([0, 1, 3, 2])]; + tensor var_1100 = const()[name = string("op_1100"), val = tensor([1, 8, 64, 64])]; + tensor var_1101 = reshape(shape = var_1100, x = value_states_25)[name = string("op_1101")]; + tensor var_1102 = const()[name = string("op_1102"), val = tensor([0, 1, 3, 2])]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_113 = transpose(perm = var_1094, x = var_1093)[name = string("transpose_26")]; + tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; + tensor var_1120 = mul(x = x1_17, y = cos_7)[name = string("op_1120")]; + tensor var_1121 = mul(x = x2_17, y = sin_7)[name = string("op_1121")]; + tensor var_1122 = sub(x = var_1120, y = var_1121)[name = string("op_1122")]; + tensor var_1123 = mul(x = x2_17, y = cos_7)[name = string("op_1123")]; + tensor var_1124 = mul(x = x1_17, y = sin_7)[name = string("op_1124")]; + tensor var_1125 = add(x = var_1123, y = var_1124)[name = string("op_1125")]; + bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; + tensor rotated_17 = concat(axis = var_45, interleave = rotated_17_interleave_0, values = (var_1122, var_1125))[name = string("rotated_17")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_117 = transpose(perm = var_1098, x = var_1097)[name = string("transpose_25")]; + tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; + tensor var_1141 = mul(x = x1_19, y = cos_7)[name = string("op_1141")]; + tensor var_1142 = mul(x = x2_19, y = sin_7)[name = string("op_1142")]; + tensor var_1143 = sub(x = var_1141, y = var_1142)[name = string("op_1143")]; + tensor var_1144 = mul(x = x2_19, y = cos_7)[name = string("op_1144")]; + tensor var_1145 = mul(x = x1_19, y = sin_7)[name = string("op_1145")]; + tensor var_1146 = add(x = var_1144, y = var_1145)[name = string("op_1146")]; + bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; + tensor rotated_19 = concat(axis = var_45, interleave = rotated_19_interleave_0, values = (var_1143, var_1146))[name = string("rotated_19")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_367, concat_75_values3_0))[name = string("concat_75")]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; + tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([20])]; + tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; + tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; + tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([21])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; + tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; + tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; + int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; + bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; + tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_367, concat_79_values3_0))[name = string("concat_79")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_27 = transpose(perm = var_1102, x = var_1101)[name = string("transpose_24")]; + tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; + tensor var_1169_begin_0 = const()[name = string("op_1169_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor var_1169_end_0 = const()[name = string("op_1169_end_0"), val = tensor([5, 8, 1546, 64])]; + tensor var_1169_end_mask_0 = const()[name = string("op_1169_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1169_cast_fp16 = slice_by_index(begin = var_1169_begin_0, end = var_1169_end_0, end_mask = var_1169_end_mask_0, x = coreml_update_state_25)[name = string("op_1169_cast_fp16")]; + tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; + tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1169_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; + tensor var_1171_begin_0 = const()[name = string("op_1171_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor var_1171_end_0 = const()[name = string("op_1171_end_0"), val = tensor([21, 8, 1546, 64])]; + tensor var_1171_end_mask_0 = const()[name = string("op_1171_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1171_cast_fp16 = slice_by_index(begin = var_1171_begin_0, end = var_1171_end_0, end_mask = var_1171_end_mask_0, x = coreml_update_state_25)[name = string("op_1171_cast_fp16")]; + tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; + tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1171_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; + tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; + tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1180 = const()[name = string("op_1180"), val = tensor([1, 4, 1, 1])]; + tensor x_125_cast_fp16 = tile(reps = var_1180, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1184 = const()[name = string("op_1184"), val = tensor([1, -1, 1546, 64])]; + tensor var_1185_cast_fp16 = reshape(shape = var_1184, x = x_125_cast_fp16)[name = string("op_1185_cast_fp16")]; + tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; + tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor var_1187 = const()[name = string("op_1187"), val = tensor([1, 4, 1, 1])]; + tensor x_131_cast_fp16 = tile(reps = var_1187, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; + bool var_1194_transpose_x_0 = const()[name = string("op_1194_transpose_x_0"), val = bool(false)]; + bool var_1194_transpose_y_0 = const()[name = string("op_1194_transpose_y_0"), val = bool(true)]; + tensor var_1194_cast_fp16 = matmul(transpose_x = var_1194_transpose_x_0, transpose_y = var_1194_transpose_y_0, x = rotated_17, y = var_1185_cast_fp16)[name = string("op_1194_cast_fp16")]; + fp16 var_1195_to_fp16 = const()[name = string("op_1195_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_9_cast_fp16 = mul(x = var_1194_cast_fp16, y = var_1195_to_fp16)[name = string("attn_weights_9_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; + tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; + bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; + tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; + tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; + tensor var_1206_axes_0 = const()[name = string("op_1206_axes_0"), val = tensor([-1])]; + bool var_1206_keep_dims_0 = const()[name = string("op_1206_keep_dims_0"), val = bool(true)]; + tensor var_1206_cast_fp16 = reduce_sum(axes = var_1206_axes_0, keep_dims = var_1206_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1206_cast_fp16")]; + tensor var_1207_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1206_cast_fp16)[name = string("op_1207_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([32, 64, 1546])]; + tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1207_cast_fp16)[name = string("reshape_12_cast_fp16")]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([32, 1546, 64])]; + tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; + bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; + bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 32, 64, 64])]; + tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; + tensor var_1210_perm_0 = const()[name = string("op_1210_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1212 = const()[name = string("op_1212"), val = tensor([1, 64, 2048])]; + tensor var_1210_cast_fp16 = transpose(perm = var_1210_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_23")]; + tensor input_61_cast_fp16 = reshape(shape = var_1212, x = var_1210_cast_fp16)[name = string("input_61_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269204800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271302016))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; + bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; + tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; + tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor var_1223_axes_0 = const()[name = string("op_1223_axes_0"), val = tensor([-1])]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271310272)))]; + tensor var_1223_cast_fp16 = layer_norm(axes = var_1223_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1223_cast_fp16")]; + tensor var_1230 = const()[name = string("op_1230"), val = tensor([0, 2, 1])]; + tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; + tensor var_1231 = transpose(perm = var_1230, x = var_1223_cast_fp16)[name = string("transpose_22")]; + tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1231)[name = string("input_65")]; + string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; + tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; + tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; + int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; + tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; + string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; + tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; + tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; + int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; + tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; + tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; + tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; + string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; + tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; + tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; + tensor var_1253_axes_0 = const()[name = string("op_1253_axes_0"), val = tensor([2])]; + tensor var_1253 = squeeze(axes = var_1253_axes_0, x = hidden_states_39)[name = string("op_1253")]; + tensor var_1254 = const()[name = string("op_1254"), val = tensor([0, 2, 1])]; + tensor var_1255 = transpose(perm = var_1254, x = var_1253)[name = string("transpose_21")]; + tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1255)[name = string("hidden_states_41_cast_fp16")]; + tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; + bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; + tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; + tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor var_1263_axes_0 = const()[name = string("op_1263_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271314432)))]; + tensor var_1263_cast_fp16 = layer_norm(axes = var_1263_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1263_cast_fp16")]; + tensor var_1267 = const()[name = string("op_1267"), val = tensor([0, 2, 1])]; + tensor var_1269_axes_0 = const()[name = string("op_1269_axes_0"), val = tensor([2])]; + tensor var_1268 = transpose(perm = var_1267, x = var_1263_cast_fp16)[name = string("transpose_20")]; + tensor var_1269 = expand_dims(axes = var_1269_axes_0, x = var_1268)[name = string("op_1269")]; + string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; + tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; + tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; + int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; + tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1269)[name = string("query_states_21")]; + string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; + tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; + tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; + int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; + tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1269)[name = string("key_states_31")]; + string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; + tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; + tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; + int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; + tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1269)[name = string("value_states_31")]; + tensor var_1289 = const()[name = string("op_1289"), val = tensor([1, 32, 64, 64])]; + tensor var_1290 = reshape(shape = var_1289, x = query_states_21)[name = string("op_1290")]; + tensor var_1291 = const()[name = string("op_1291"), val = tensor([0, 1, 3, 2])]; + tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 8, 64, 64])]; + tensor var_1294 = reshape(shape = var_1293, x = key_states_31)[name = string("op_1294")]; + tensor var_1295 = const()[name = string("op_1295"), val = tensor([0, 1, 3, 2])]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([1, 8, 64, 64])]; + tensor var_1298 = reshape(shape = var_1297, x = value_states_31)[name = string("op_1298")]; + tensor var_1299 = const()[name = string("op_1299"), val = tensor([0, 1, 3, 2])]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_141 = transpose(perm = var_1291, x = var_1290)[name = string("transpose_19")]; + tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; + tensor var_1317 = mul(x = x1_21, y = cos_7)[name = string("op_1317")]; + tensor var_1318 = mul(x = x2_21, y = sin_7)[name = string("op_1318")]; + tensor var_1319 = sub(x = var_1317, y = var_1318)[name = string("op_1319")]; + tensor var_1320 = mul(x = x2_21, y = cos_7)[name = string("op_1320")]; + tensor var_1321 = mul(x = x1_21, y = sin_7)[name = string("op_1321")]; + tensor var_1322 = add(x = var_1320, y = var_1321)[name = string("op_1322")]; + bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; + tensor rotated_21 = concat(axis = var_45, interleave = rotated_21_interleave_0, values = (var_1319, var_1322))[name = string("rotated_21")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_145 = transpose(perm = var_1295, x = var_1294)[name = string("transpose_18")]; + tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; + tensor var_1338 = mul(x = x1_23, y = cos_7)[name = string("op_1338")]; + tensor var_1339 = mul(x = x2_23, y = sin_7)[name = string("op_1339")]; + tensor var_1340 = sub(x = var_1338, y = var_1339)[name = string("op_1340")]; + tensor var_1341 = mul(x = x2_23, y = cos_7)[name = string("op_1341")]; + tensor var_1342 = mul(x = x1_23, y = sin_7)[name = string("op_1342")]; + tensor var_1343 = add(x = var_1341, y = var_1342)[name = string("op_1343")]; + bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; + tensor rotated_23 = concat(axis = var_45, interleave = rotated_23_interleave_0, values = (var_1340, var_1343))[name = string("rotated_23")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_367, concat_93_values3_0))[name = string("concat_93")]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([21])]; + tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; + tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; + tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([22])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; + tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; + tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; + int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; + bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; + tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_367, concat_97_values3_0))[name = string("concat_97")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_33 = transpose(perm = var_1299, x = var_1298)[name = string("transpose_17")]; + tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; + tensor var_1366_begin_0 = const()[name = string("op_1366_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor var_1366_end_0 = const()[name = string("op_1366_end_0"), val = tensor([6, 8, 1546, 64])]; + tensor var_1366_end_mask_0 = const()[name = string("op_1366_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1366_cast_fp16 = slice_by_index(begin = var_1366_begin_0, end = var_1366_end_0, end_mask = var_1366_end_mask_0, x = coreml_update_state_27)[name = string("op_1366_cast_fp16")]; + tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; + tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1366_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = string("op_1368_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor var_1368_end_0 = const()[name = string("op_1368_end_0"), val = tensor([22, 8, 1546, 64])]; + tensor var_1368_end_mask_0 = const()[name = string("op_1368_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = coreml_update_state_27)[name = string("op_1368_cast_fp16")]; + tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; + tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1368_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; + tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; + tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1377 = const()[name = string("op_1377"), val = tensor([1, 4, 1, 1])]; + tensor x_153_cast_fp16 = tile(reps = var_1377, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1381 = const()[name = string("op_1381"), val = tensor([1, -1, 1546, 64])]; + tensor var_1382_cast_fp16 = reshape(shape = var_1381, x = x_153_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; + tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1384 = const()[name = string("op_1384"), val = tensor([1, 4, 1, 1])]; + tensor x_159_cast_fp16 = tile(reps = var_1384, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; + bool var_1391_transpose_x_0 = const()[name = string("op_1391_transpose_x_0"), val = bool(false)]; + bool var_1391_transpose_y_0 = const()[name = string("op_1391_transpose_y_0"), val = bool(true)]; + tensor var_1391_cast_fp16 = matmul(transpose_x = var_1391_transpose_x_0, transpose_y = var_1391_transpose_y_0, x = rotated_21, y = var_1382_cast_fp16)[name = string("op_1391_cast_fp16")]; + fp16 var_1392_to_fp16 = const()[name = string("op_1392_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_11_cast_fp16 = mul(x = var_1391_cast_fp16, y = var_1392_to_fp16)[name = string("attn_weights_11_cast_fp16")]; + tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; + tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; + bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; + tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; + tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; + tensor var_1403_axes_0 = const()[name = string("op_1403_axes_0"), val = tensor([-1])]; + bool var_1403_keep_dims_0 = const()[name = string("op_1403_keep_dims_0"), val = bool(true)]; + tensor var_1403_cast_fp16 = reduce_sum(axes = var_1403_axes_0, keep_dims = var_1403_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1403_cast_fp16")]; + tensor var_1404_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1403_cast_fp16)[name = string("op_1404_cast_fp16")]; + tensor concat_102 = const()[name = string("concat_102"), val = tensor([32, 64, 1546])]; + tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1404_cast_fp16)[name = string("reshape_15_cast_fp16")]; + tensor concat_103 = const()[name = string("concat_103"), val = tensor([32, 1546, 64])]; + tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; + bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; + bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 32, 64, 64])]; + tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; + tensor var_1407_perm_0 = const()[name = string("op_1407_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1409 = const()[name = string("op_1409"), val = tensor([1, 64, 2048])]; + tensor var_1407_cast_fp16 = transpose(perm = var_1407_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_16")]; + tensor input_75_cast_fp16 = reshape(shape = var_1409, x = var_1407_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271318592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273415808))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; + bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; + tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; + tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; + tensor var_1420_axes_0 = const()[name = string("op_1420_axes_0"), val = tensor([-1])]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273424064)))]; + tensor var_1420_cast_fp16 = layer_norm(axes = var_1420_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1420_cast_fp16")]; + tensor var_1427 = const()[name = string("op_1427"), val = tensor([0, 2, 1])]; + tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; + tensor var_1428 = transpose(perm = var_1427, x = var_1420_cast_fp16)[name = string("transpose_15")]; + tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1428)[name = string("input_79")]; + string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; + tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; + tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; + int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; + tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; + string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; + tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; + tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; + int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; + tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; + tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; + tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; + string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; + tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; + tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; + tensor var_1450_axes_0 = const()[name = string("op_1450_axes_0"), val = tensor([2])]; + tensor var_1450 = squeeze(axes = var_1450_axes_0, x = hidden_states_47)[name = string("op_1450")]; + tensor var_1451 = const()[name = string("op_1451"), val = tensor([0, 2, 1])]; + tensor var_1452 = transpose(perm = var_1451, x = var_1450)[name = string("transpose_14")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1452)[name = string("hidden_states_49_cast_fp16")]; + tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; + bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; + tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; + tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; + tensor var_1460_axes_0 = const()[name = string("op_1460_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273428224)))]; + tensor var_1460_cast_fp16 = layer_norm(axes = var_1460_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1460_cast_fp16")]; + tensor var_1464 = const()[name = string("op_1464"), val = tensor([0, 2, 1])]; + tensor var_1466_axes_0 = const()[name = string("op_1466_axes_0"), val = tensor([2])]; + tensor var_1465 = transpose(perm = var_1464, x = var_1460_cast_fp16)[name = string("transpose_13")]; + tensor var_1466 = expand_dims(axes = var_1466_axes_0, x = var_1465)[name = string("op_1466")]; + string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; + tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; + tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; + int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; + tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1466)[name = string("query_states_25")]; + string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; + tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; + tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; + int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; + tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1466)[name = string("key_states_37")]; + string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; + tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; + tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; + int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; + tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1466)[name = string("value_states_37")]; + tensor var_1486 = const()[name = string("op_1486"), val = tensor([1, 32, 64, 64])]; + tensor var_1487 = reshape(shape = var_1486, x = query_states_25)[name = string("op_1487")]; + tensor var_1488 = const()[name = string("op_1488"), val = tensor([0, 1, 3, 2])]; + tensor var_1490 = const()[name = string("op_1490"), val = tensor([1, 8, 64, 64])]; + tensor var_1491 = reshape(shape = var_1490, x = key_states_37)[name = string("op_1491")]; + tensor var_1492 = const()[name = string("op_1492"), val = tensor([0, 1, 3, 2])]; + tensor var_1494 = const()[name = string("op_1494"), val = tensor([1, 8, 64, 64])]; + tensor var_1495 = reshape(shape = var_1494, x = value_states_37)[name = string("op_1495")]; + tensor var_1496 = const()[name = string("op_1496"), val = tensor([0, 1, 3, 2])]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_169 = transpose(perm = var_1488, x = var_1487)[name = string("transpose_12")]; + tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; + tensor var_1514 = mul(x = x1_25, y = cos_7)[name = string("op_1514")]; + tensor var_1515 = mul(x = x2_25, y = sin_7)[name = string("op_1515")]; + tensor var_1516 = sub(x = var_1514, y = var_1515)[name = string("op_1516")]; + tensor var_1517 = mul(x = x2_25, y = cos_7)[name = string("op_1517")]; + tensor var_1518 = mul(x = x1_25, y = sin_7)[name = string("op_1518")]; + tensor var_1519 = add(x = var_1517, y = var_1518)[name = string("op_1519")]; + bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; + tensor rotated_25 = concat(axis = var_45, interleave = rotated_25_interleave_0, values = (var_1516, var_1519))[name = string("rotated_25")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_173 = transpose(perm = var_1492, x = var_1491)[name = string("transpose_11")]; + tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; + tensor var_1535 = mul(x = x1_27, y = cos_7)[name = string("op_1535")]; + tensor var_1536 = mul(x = x2_27, y = sin_7)[name = string("op_1536")]; + tensor var_1537 = sub(x = var_1535, y = var_1536)[name = string("op_1537")]; + tensor var_1538 = mul(x = x2_27, y = cos_7)[name = string("op_1538")]; + tensor var_1539 = mul(x = x1_27, y = sin_7)[name = string("op_1539")]; + tensor var_1540 = add(x = var_1538, y = var_1539)[name = string("op_1540")]; + bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; + tensor rotated_27 = concat(axis = var_45, interleave = rotated_27_interleave_0, values = (var_1537, var_1540))[name = string("rotated_27")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_367, concat_111_values3_0))[name = string("concat_111")]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; + tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([22])]; + tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; + tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([23])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_367, concat_115_values3_0))[name = string("concat_115")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_39 = transpose(perm = var_1496, x = var_1495)[name = string("transpose_10")]; + tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; + tensor var_1563_begin_0 = const()[name = string("op_1563_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor var_1563_end_0 = const()[name = string("op_1563_end_0"), val = tensor([7, 8, 1546, 64])]; + tensor var_1563_end_mask_0 = const()[name = string("op_1563_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1563_cast_fp16 = slice_by_index(begin = var_1563_begin_0, end = var_1563_end_0, end_mask = var_1563_end_mask_0, x = coreml_update_state_29)[name = string("op_1563_cast_fp16")]; + tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; + tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1563_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; + tensor var_1565_begin_0 = const()[name = string("op_1565_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor var_1565_end_0 = const()[name = string("op_1565_end_0"), val = tensor([23, 8, 1546, 64])]; + tensor var_1565_end_mask_0 = const()[name = string("op_1565_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1565_cast_fp16 = slice_by_index(begin = var_1565_begin_0, end = var_1565_end_0, end_mask = var_1565_end_mask_0, x = coreml_update_state_29)[name = string("op_1565_cast_fp16")]; + tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; + tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1565_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; + tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; + tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1574 = const()[name = string("op_1574"), val = tensor([1, 4, 1, 1])]; + tensor x_181_cast_fp16 = tile(reps = var_1574, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1578 = const()[name = string("op_1578"), val = tensor([1, -1, 1546, 64])]; + tensor var_1579_cast_fp16 = reshape(shape = var_1578, x = x_181_cast_fp16)[name = string("op_1579_cast_fp16")]; + tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; + tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1581 = const()[name = string("op_1581"), val = tensor([1, 4, 1, 1])]; + tensor x_187_cast_fp16 = tile(reps = var_1581, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; + bool var_1588_transpose_x_0 = const()[name = string("op_1588_transpose_x_0"), val = bool(false)]; + bool var_1588_transpose_y_0 = const()[name = string("op_1588_transpose_y_0"), val = bool(true)]; + tensor var_1588_cast_fp16 = matmul(transpose_x = var_1588_transpose_x_0, transpose_y = var_1588_transpose_y_0, x = rotated_25, y = var_1579_cast_fp16)[name = string("op_1588_cast_fp16")]; + fp16 var_1589_to_fp16 = const()[name = string("op_1589_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_13_cast_fp16 = mul(x = var_1588_cast_fp16, y = var_1589_to_fp16)[name = string("attn_weights_13_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; + tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; + bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; + tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; + tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; + tensor var_1600_axes_0 = const()[name = string("op_1600_axes_0"), val = tensor([-1])]; + bool var_1600_keep_dims_0 = const()[name = string("op_1600_keep_dims_0"), val = bool(true)]; + tensor var_1600_cast_fp16 = reduce_sum(axes = var_1600_axes_0, keep_dims = var_1600_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1600_cast_fp16")]; + tensor var_1601_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1600_cast_fp16)[name = string("op_1601_cast_fp16")]; + tensor concat_120 = const()[name = string("concat_120"), val = tensor([32, 64, 1546])]; + tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1601_cast_fp16)[name = string("reshape_18_cast_fp16")]; + tensor concat_121 = const()[name = string("concat_121"), val = tensor([32, 1546, 64])]; + tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; + bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; + bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 32, 64, 64])]; + tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; + tensor var_1604_perm_0 = const()[name = string("op_1604_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1606 = const()[name = string("op_1606"), val = tensor([1, 64, 2048])]; + tensor var_1604_cast_fp16 = transpose(perm = var_1604_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_9")]; + tensor input_89_cast_fp16 = reshape(shape = var_1606, x = var_1604_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273432384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275529600))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; + bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; + tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; + tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor var_1617_axes_0 = const()[name = string("op_1617_axes_0"), val = tensor([-1])]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275537856)))]; + tensor var_1617_cast_fp16 = layer_norm(axes = var_1617_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1617_cast_fp16")]; + tensor var_1624 = const()[name = string("op_1624"), val = tensor([0, 2, 1])]; + tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; + tensor var_1625 = transpose(perm = var_1624, x = var_1617_cast_fp16)[name = string("transpose_8")]; + tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1625)[name = string("input_93")]; + string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; + tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; + tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; + int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; + tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; + string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; + tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; + tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; + int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; + tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; + tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; + tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; + string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; + tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; + tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; + tensor var_1647_axes_0 = const()[name = string("op_1647_axes_0"), val = tensor([2])]; + tensor var_1647 = squeeze(axes = var_1647_axes_0, x = hidden_states_55)[name = string("op_1647")]; + tensor var_1648 = const()[name = string("op_1648"), val = tensor([0, 2, 1])]; + tensor var_1649 = transpose(perm = var_1648, x = var_1647)[name = string("transpose_7")]; + tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1649)[name = string("hidden_states_57_cast_fp16")]; + tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; + bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; + tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; + tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor var_1657_axes_0 = const()[name = string("op_1657_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275542016)))]; + tensor var_1657_cast_fp16 = layer_norm(axes = var_1657_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1657_cast_fp16")]; + tensor var_1661 = const()[name = string("op_1661"), val = tensor([0, 2, 1])]; + tensor var_1663_axes_0 = const()[name = string("op_1663_axes_0"), val = tensor([2])]; + tensor var_1662 = transpose(perm = var_1661, x = var_1657_cast_fp16)[name = string("transpose_6")]; + tensor var_1663 = expand_dims(axes = var_1663_axes_0, x = var_1662)[name = string("op_1663")]; + string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; + tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; + tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; + int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; + tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1663)[name = string("query_states_29")]; + string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; + tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; + tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; + int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; + tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1663)[name = string("key_states_43")]; + string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; + tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; + tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; + int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; + tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1663)[name = string("value_states_43")]; + tensor var_1683 = const()[name = string("op_1683"), val = tensor([1, 32, 64, 64])]; + tensor var_1684 = reshape(shape = var_1683, x = query_states_29)[name = string("op_1684")]; + tensor var_1685 = const()[name = string("op_1685"), val = tensor([0, 1, 3, 2])]; + tensor var_1687 = const()[name = string("op_1687"), val = tensor([1, 8, 64, 64])]; + tensor var_1688 = reshape(shape = var_1687, x = key_states_43)[name = string("op_1688")]; + tensor var_1689 = const()[name = string("op_1689"), val = tensor([0, 1, 3, 2])]; + tensor var_1691 = const()[name = string("op_1691"), val = tensor([1, 8, 64, 64])]; + tensor var_1692 = reshape(shape = var_1691, x = value_states_43)[name = string("op_1692")]; + tensor var_1693 = const()[name = string("op_1693"), val = tensor([0, 1, 3, 2])]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 64, 32])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_197 = transpose(perm = var_1685, x = var_1684)[name = string("transpose_5")]; + tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 64, 64])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; + tensor var_1711 = mul(x = x1_29, y = cos_7)[name = string("op_1711")]; + tensor var_1712 = mul(x = x2_29, y = sin_7)[name = string("op_1712")]; + tensor var_1713 = sub(x = var_1711, y = var_1712)[name = string("op_1713")]; + tensor var_1714 = mul(x = x2_29, y = cos_7)[name = string("op_1714")]; + tensor var_1715 = mul(x = x1_29, y = sin_7)[name = string("op_1715")]; + tensor var_1716 = add(x = var_1714, y = var_1715)[name = string("op_1716")]; + bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; + tensor rotated_29 = concat(axis = var_45, interleave = rotated_29_interleave_0, values = (var_1713, var_1716))[name = string("rotated_29")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 32])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x_201 = transpose(perm = var_1689, x = var_1688)[name = string("transpose_4")]; + tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_201)[name = string("x1")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 64])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_201)[name = string("x2")]; + tensor var_1732 = mul(x = x1, y = cos_7)[name = string("op_1732")]; + tensor var_1733 = mul(x = x2, y = sin_7)[name = string("op_1733")]; + tensor var_1734 = sub(x = var_1732, y = var_1733)[name = string("op_1734")]; + tensor var_1735 = mul(x = x2, y = cos_7)[name = string("op_1735")]; + tensor var_1736 = mul(x = x1, y = sin_7)[name = string("op_1736")]; + tensor var_1737 = add(x = var_1735, y = var_1736)[name = string("op_1737")]; + bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; + tensor rotated = concat(axis = var_45, interleave = rotated_interleave_0, values = (var_1734, var_1737))[name = string("rotated")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; + int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; + bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; + tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_367, concat_129_values3_0))[name = string("concat_129")]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; + tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([23])]; + tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; + tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; + tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([24])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; + tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; + tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; + int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; + bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; + tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_367, concat_133_values3_0))[name = string("concat_133")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor value_states_45 = transpose(perm = var_1693, x = var_1692)[name = string("transpose_3")]; + tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; + write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; + tensor var_1760_begin_0 = const()[name = string("op_1760_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor var_1760_end_0 = const()[name = string("op_1760_end_0"), val = tensor([8, 8, 1546, 64])]; + tensor var_1760_end_mask_0 = const()[name = string("op_1760_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1760_cast_fp16 = slice_by_index(begin = var_1760_begin_0, end = var_1760_end_0, end_mask = var_1760_end_mask_0, x = coreml_update_state_31)[name = string("op_1760_cast_fp16")]; + tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; + tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1760_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; + tensor var_1762_begin_0 = const()[name = string("op_1762_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor var_1762_end_0 = const()[name = string("op_1762_end_0"), val = tensor([24, 8, 1546, 64])]; + tensor var_1762_end_mask_0 = const()[name = string("op_1762_end_mask_0"), val = tensor([false, true, true, true])]; + tensor var_1762_cast_fp16 = slice_by_index(begin = var_1762_begin_0, end = var_1762_end_0, end_mask = var_1762_end_mask_0, x = coreml_update_state_31)[name = string("op_1762_cast_fp16")]; + tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; + tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1762_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; + tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; + tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_1771 = const()[name = string("op_1771"), val = tensor([1, 4, 1, 1])]; + tensor x_209_cast_fp16 = tile(reps = var_1771, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1775 = const()[name = string("op_1775"), val = tensor([1, -1, 1546, 64])]; + tensor var_1776_cast_fp16 = reshape(shape = var_1775, x = x_209_cast_fp16)[name = string("op_1776_cast_fp16")]; + tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; + tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_1778 = const()[name = string("op_1778"), val = tensor([1, 4, 1, 1])]; + tensor x_215_cast_fp16 = tile(reps = var_1778, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; + bool var_1785_transpose_x_0 = const()[name = string("op_1785_transpose_x_0"), val = bool(false)]; + bool var_1785_transpose_y_0 = const()[name = string("op_1785_transpose_y_0"), val = bool(true)]; + tensor var_1785_cast_fp16 = matmul(transpose_x = var_1785_transpose_x_0, transpose_y = var_1785_transpose_y_0, x = rotated_29, y = var_1776_cast_fp16)[name = string("op_1785_cast_fp16")]; + fp16 var_1786_to_fp16 = const()[name = string("op_1786_to_fp16"), val = fp16(0x1p-3)]; + tensor attn_weights_cast_fp16 = mul(x = var_1785_cast_fp16, y = var_1786_to_fp16)[name = string("attn_weights_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; + tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; + bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; + tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; + tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor exp_x_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_cast_fp16")]; + tensor var_1797_axes_0 = const()[name = string("op_1797_axes_0"), val = tensor([-1])]; + bool var_1797_keep_dims_0 = const()[name = string("op_1797_keep_dims_0"), val = bool(true)]; + tensor var_1797_cast_fp16 = reduce_sum(axes = var_1797_axes_0, keep_dims = var_1797_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1797_cast_fp16")]; + tensor var_1798_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1797_cast_fp16)[name = string("op_1798_cast_fp16")]; + tensor concat_138 = const()[name = string("concat_138"), val = tensor([32, 64, 1546])]; + tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_1798_cast_fp16)[name = string("reshape_21_cast_fp16")]; + tensor concat_139 = const()[name = string("concat_139"), val = tensor([32, 1546, 64])]; + tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; + bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; + bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 32, 64, 64])]; + tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; + tensor var_1801_perm_0 = const()[name = string("op_1801_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor var_1803 = const()[name = string("op_1803"), val = tensor([1, 64, 2048])]; + tensor var_1801_cast_fp16 = transpose(perm = var_1801_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_2")]; + tensor input_103_cast_fp16 = reshape(shape = var_1803, x = var_1801_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275546176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277643392))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; + tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; + bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; + tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_cast_fp16")]; + tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor var_1814_axes_0 = const()[name = string("op_1814_axes_0"), val = tensor([-1])]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277651648)))]; + tensor var_1814_cast_fp16 = layer_norm(axes = var_1814_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1814_cast_fp16")]; + tensor var_1821 = const()[name = string("op_1821"), val = tensor([0, 2, 1])]; + tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; + tensor var_1822 = transpose(perm = var_1821, x = var_1814_cast_fp16)[name = string("transpose_1")]; + tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1822)[name = string("input_107")]; + string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; + tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; + tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; + int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; + tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; + string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; + tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; + tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; + int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; + tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states")]; + tensor gate_states = silu(x = input_109)[name = string("gate_states")]; + tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; + string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; + tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; + tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; + int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; + tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; + tensor var_1844_axes_0 = const()[name = string("op_1844_axes_0"), val = tensor([2])]; + tensor var_1844 = squeeze(axes = var_1844_axes_0, x = hidden_states_1)[name = string("op_1844")]; + tensor var_1845 = const()[name = string("op_1845"), val = tensor([0, 2, 1])]; + tensor var_1846 = transpose(perm = var_1845, x = var_1844)[name = string("transpose_0")]; + tensor output_hidden_states = add(x = hidden_states_61_cast_fp16, y = var_1846)[name = string("op_1847_cast_fp16")]; + } -> (output_hidden_states); +} \ No newline at end of file