program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201946816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210335488))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210368320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218756992))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")]; tensor model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218789824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227178496))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")]; int32 var_55 = const()[name = string("op_55"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_265_axis_0 = const()[name = string("op_265_axis_0"), val = int32(1)]; int32 var_265_batch_dims_0 = const()[name = string("op_265_batch_dims_0"), val = int32(0)]; bool var_265_validate_indices_0 = const()[name = string("op_265_validate_indices_0"), val = bool(false)]; tensor var_60_to_fp16 = const()[name = string("op_60_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752)))]; tensor var_265_cast_fp16 = gather(axis = var_265_axis_0, batch_dims = var_265_batch_dims_0, indices = select_0, validate_indices = var_265_validate_indices_0, x = var_60_to_fp16)[name = string("op_265_cast_fp16")]; tensor var_266 = const()[name = string("op_266"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_266, x = var_265_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_270_axis_0 = const()[name = string("op_270_axis_0"), val = int32(1)]; int32 var_270_batch_dims_0 = const()[name = string("op_270_batch_dims_0"), val = int32(0)]; bool var_270_validate_indices_0 = const()[name = string("op_270_validate_indices_0"), val = bool(false)]; tensor var_54_to_fp16 = const()[name = string("op_54_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243964032)))]; tensor var_270_cast_fp16 = gather(axis = var_270_axis_0, batch_dims = var_270_batch_dims_0, indices = select_0, validate_indices = var_270_validate_indices_0, x = var_54_to_fp16)[name = string("op_270_cast_fp16")]; tensor var_271 = const()[name = string("op_271"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_271, x = var_270_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_279_axes_0 = const()[name = string("op_279_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260741312)))]; fp16 var_50_to_fp16 = const()[name = string("op_50_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_279_cast_fp16 = layer_norm(axes = var_279_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_279_cast_fp16")]; tensor var_282 = const()[name = string("op_282"), val = tensor([0, 2, 1])]; tensor var_284_axes_0 = const()[name = string("op_284_axes_0"), val = tensor([2])]; tensor var_283 = transpose(perm = var_282, x = var_279_cast_fp16)[name = string("transpose_31")]; tensor var_284 = expand_dims(axes = var_284_axes_0, x = var_283)[name = string("op_284")]; string var_291_pad_type_0 = const()[name = string("op_291_pad_type_0"), val = string("valid")]; tensor var_291_strides_0 = const()[name = string("op_291_strides_0"), val = tensor([1, 1])]; tensor var_291_pad_0 = const()[name = string("op_291_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_291_dilations_0 = const()[name = string("op_291_dilations_0"), val = tensor([1, 1])]; int32 var_291_groups_0 = const()[name = string("op_291_groups_0"), val = int32(1)]; tensor var_291 = conv(dilations = var_291_dilations_0, groups = var_291_groups_0, pad = var_291_pad_0, pad_type = var_291_pad_type_0, strides = var_291_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_284)[name = string("op_291")]; tensor var_292 = const()[name = string("op_292"), val = tensor([1, 32, 1, 64])]; tensor var_293 = reshape(shape = var_292, x = var_291)[name = string("op_293")]; string var_300_pad_type_0 = const()[name = string("op_300_pad_type_0"), val = string("valid")]; tensor var_300_strides_0 = const()[name = string("op_300_strides_0"), val = tensor([1, 1])]; tensor var_300_pad_0 = const()[name = string("op_300_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_300_dilations_0 = const()[name = string("op_300_dilations_0"), val = tensor([1, 1])]; int32 var_300_groups_0 = const()[name = string("op_300_groups_0"), val = int32(1)]; tensor var_300 = conv(dilations = var_300_dilations_0, groups = var_300_groups_0, pad = var_300_pad_0, pad_type = var_300_pad_type_0, strides = var_300_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_284)[name = string("op_300")]; tensor var_301 = const()[name = string("op_301"), val = tensor([1, 8, 1, 64])]; tensor var_302 = reshape(shape = var_301, x = var_300)[name = string("op_302")]; string var_309_pad_type_0 = const()[name = string("op_309_pad_type_0"), val = string("valid")]; tensor var_309_strides_0 = const()[name = string("op_309_strides_0"), val = tensor([1, 1])]; tensor var_309_pad_0 = const()[name = string("op_309_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_309_dilations_0 = const()[name = string("op_309_dilations_0"), val = tensor([1, 1])]; int32 var_309_groups_0 = const()[name = string("op_309_groups_0"), val = int32(1)]; tensor var_309 = conv(dilations = var_309_dilations_0, groups = var_309_groups_0, pad = var_309_pad_0, pad_type = var_309_pad_type_0, strides = var_309_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_284)[name = string("op_309")]; tensor var_310 = const()[name = string("op_310"), val = tensor([1, 8, 1, 64])]; tensor var_311 = reshape(shape = var_310, x = var_309)[name = string("op_311")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_293)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_293)[name = string("x2_1")]; tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 32])]; tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 32])]; tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; tensor var_325_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_325_cast_fp16")]; tensor var_326_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_326_cast_fp16")]; tensor var_327_cast_fp16 = sub(x = var_325_cast_fp16, y = var_326_cast_fp16)[name = string("op_327_cast_fp16")]; tensor var_328_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_328_cast_fp16")]; tensor var_329_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_329_cast_fp16")]; tensor var_330_cast_fp16 = add(x = var_328_cast_fp16, y = var_329_cast_fp16)[name = string("op_330_cast_fp16")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_55, interleave = rotated_1_interleave_0, values = (var_327_cast_fp16, var_330_cast_fp16))[name = string("rotated_1_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_302)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_302)[name = string("x2_3")]; tensor var_346_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_346_cast_fp16")]; tensor var_347_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_347_cast_fp16")]; tensor var_348_cast_fp16 = sub(x = var_346_cast_fp16, y = var_347_cast_fp16)[name = string("op_348_cast_fp16")]; tensor var_349_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_349_cast_fp16")]; tensor var_350_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_350_cast_fp16")]; tensor var_351_cast_fp16 = add(x = var_349_cast_fp16, y = var_350_cast_fp16)[name = string("op_351_cast_fp16")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_55, interleave = rotated_3_interleave_0, values = (var_348_cast_fp16, var_351_cast_fp16))[name = string("rotated_3_cast_fp16")]; int32 var_355 = const()[name = string("op_355"), val = int32(1)]; tensor var_356 = add(x = current_pos, y = var_355)[name = string("op_356")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([8])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([9])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_356, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([24])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([25])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_356, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_311, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_371_begin_0 = const()[name = string("op_371_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_371_end_0 = const()[name = string("op_371_end_0"), val = tensor([9, 8, 1546, 64])]; tensor var_371_end_mask_0 = const()[name = string("op_371_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_371_cast_fp16 = slice_by_index(begin = var_371_begin_0, end = var_371_end_0, end_mask = var_371_end_mask_0, x = coreml_update_state_17)[name = string("op_371_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_371_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_373_begin_0 = const()[name = string("op_373_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_373_end_0 = const()[name = string("op_373_end_0"), val = tensor([25, 8, 1546, 64])]; tensor var_373_end_mask_0 = const()[name = string("op_373_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = coreml_update_state_17)[name = string("op_373_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_373_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_382 = const()[name = string("op_382"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_382, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_386 = const()[name = string("op_386"), val = tensor([1, -1, 1546, 64])]; tensor key_states_3_cast_fp16 = reshape(shape = var_386, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_389 = const()[name = string("op_389"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_389, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_393 = const()[name = string("op_393"), val = tensor([1, -1, 1546, 64])]; tensor value_states_3_cast_fp16 = reshape(shape = var_393, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_396_transpose_x_1 = const()[name = string("op_396_transpose_x_1"), val = bool(false)]; bool var_396_transpose_y_1 = const()[name = string("op_396_transpose_y_1"), val = bool(true)]; tensor var_396_cast_fp16 = matmul(transpose_x = var_396_transpose_x_1, transpose_y = var_396_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_396_cast_fp16")]; fp16 var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_1_cast_fp16 = mul(x = var_396_cast_fp16, y = var_397_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_408_axes_0 = const()[name = string("op_408_axes_0"), val = tensor([-1])]; bool var_408_keep_dims_0 = const()[name = string("op_408_keep_dims_0"), val = bool(true)]; tensor var_408_cast_fp16 = reduce_sum(axes = var_408_axes_0, keep_dims = var_408_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_408_cast_fp16")]; tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_408_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_411_perm_0 = const()[name = string("op_411_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_413 = const()[name = string("op_413"), val = tensor([1, 1, 2048])]; tensor var_411_cast_fp16 = transpose(perm = var_411_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_30")]; tensor input_5_cast_fp16 = reshape(shape = var_413, x = var_411_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262842688))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262850944)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_424_axes_0 = const()[name = string("op_424_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262855104)))]; tensor var_424_cast_fp16 = layer_norm(axes = var_424_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_424_cast_fp16")]; tensor var_431 = const()[name = string("op_431"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_432 = transpose(perm = var_431, x = var_424_cast_fp16)[name = string("transpose_29")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_432)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_454_axes_0 = const()[name = string("op_454_axes_0"), val = tensor([2])]; tensor var_454 = squeeze(axes = var_454_axes_0, x = hidden_states_7)[name = string("op_454")]; tensor var_455 = const()[name = string("op_455"), val = tensor([0, 2, 1])]; tensor var_456 = transpose(perm = var_455, x = var_454)[name = string("transpose_28")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_456)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_464_axes_0 = const()[name = string("op_464_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262859264)))]; tensor var_464_cast_fp16 = layer_norm(axes = var_464_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_464_cast_fp16")]; tensor var_467 = const()[name = string("op_467"), val = tensor([0, 2, 1])]; tensor var_469_axes_0 = const()[name = string("op_469_axes_0"), val = tensor([2])]; tensor var_468 = transpose(perm = var_467, x = var_464_cast_fp16)[name = string("transpose_27")]; tensor var_469 = expand_dims(axes = var_469_axes_0, x = var_468)[name = string("op_469")]; string var_476_pad_type_0 = const()[name = string("op_476_pad_type_0"), val = string("valid")]; tensor var_476_strides_0 = const()[name = string("op_476_strides_0"), val = tensor([1, 1])]; tensor var_476_pad_0 = const()[name = string("op_476_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_476_dilations_0 = const()[name = string("op_476_dilations_0"), val = tensor([1, 1])]; int32 var_476_groups_0 = const()[name = string("op_476_groups_0"), val = int32(1)]; tensor var_476 = conv(dilations = var_476_dilations_0, groups = var_476_groups_0, pad = var_476_pad_0, pad_type = var_476_pad_type_0, strides = var_476_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_469)[name = string("op_476")]; tensor var_477 = const()[name = string("op_477"), val = tensor([1, 32, 1, 64])]; tensor var_478 = reshape(shape = var_477, x = var_476)[name = string("op_478")]; string var_485_pad_type_0 = const()[name = string("op_485_pad_type_0"), val = string("valid")]; tensor var_485_strides_0 = const()[name = string("op_485_strides_0"), val = tensor([1, 1])]; tensor var_485_pad_0 = const()[name = string("op_485_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_485_dilations_0 = const()[name = string("op_485_dilations_0"), val = tensor([1, 1])]; int32 var_485_groups_0 = const()[name = string("op_485_groups_0"), val = int32(1)]; tensor var_485 = conv(dilations = var_485_dilations_0, groups = var_485_groups_0, pad = var_485_pad_0, pad_type = var_485_pad_type_0, strides = var_485_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_469)[name = string("op_485")]; tensor var_486 = const()[name = string("op_486"), val = tensor([1, 8, 1, 64])]; tensor var_487 = reshape(shape = var_486, x = var_485)[name = string("op_487")]; string var_494_pad_type_0 = const()[name = string("op_494_pad_type_0"), val = string("valid")]; tensor var_494_strides_0 = const()[name = string("op_494_strides_0"), val = tensor([1, 1])]; tensor var_494_pad_0 = const()[name = string("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_494_dilations_0 = const()[name = string("op_494_dilations_0"), val = tensor([1, 1])]; int32 var_494_groups_0 = const()[name = string("op_494_groups_0"), val = int32(1)]; tensor var_494 = conv(dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_469)[name = string("op_494")]; tensor var_495 = const()[name = string("op_495"), val = tensor([1, 8, 1, 64])]; tensor var_496 = reshape(shape = var_495, x = var_494)[name = string("op_496")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_478)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_478)[name = string("x2_5")]; tensor var_510_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_510_cast_fp16")]; tensor var_511_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_511_cast_fp16")]; tensor var_512_cast_fp16 = sub(x = var_510_cast_fp16, y = var_511_cast_fp16)[name = string("op_512_cast_fp16")]; tensor var_513_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_513_cast_fp16")]; tensor var_514_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_514_cast_fp16")]; tensor var_515_cast_fp16 = add(x = var_513_cast_fp16, y = var_514_cast_fp16)[name = string("op_515_cast_fp16")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_55, interleave = rotated_5_interleave_0, values = (var_512_cast_fp16, var_515_cast_fp16))[name = string("rotated_5_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_487)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_487)[name = string("x2_7")]; tensor var_531_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_531_cast_fp16")]; tensor var_532_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_532_cast_fp16")]; tensor var_533_cast_fp16 = sub(x = var_531_cast_fp16, y = var_532_cast_fp16)[name = string("op_533_cast_fp16")]; tensor var_534_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_534_cast_fp16")]; tensor var_535_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_535_cast_fp16")]; tensor var_536_cast_fp16 = add(x = var_534_cast_fp16, y = var_535_cast_fp16)[name = string("op_536_cast_fp16")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7_cast_fp16 = concat(axis = var_55, interleave = rotated_7_interleave_0, values = (var_533_cast_fp16, var_536_cast_fp16))[name = string("rotated_7_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([9])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([10])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_356, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([25])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([26])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_356, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_496, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor([10, 8, 1546, 64])]; tensor var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_19)[name = string("op_556_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_556_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_558_begin_0 = const()[name = string("op_558_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_558_end_0 = const()[name = string("op_558_end_0"), val = tensor([26, 8, 1546, 64])]; tensor var_558_end_mask_0 = const()[name = string("op_558_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = coreml_update_state_19)[name = string("op_558_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_558_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_567 = const()[name = string("op_567"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_567, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_571 = const()[name = string("op_571"), val = tensor([1, -1, 1546, 64])]; tensor key_states_7_cast_fp16 = reshape(shape = var_571, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_574 = const()[name = string("op_574"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_574, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_578 = const()[name = string("op_578"), val = tensor([1, -1, 1546, 64])]; tensor value_states_7_cast_fp16 = reshape(shape = var_578, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; bool var_581_transpose_x_1 = const()[name = string("op_581_transpose_x_1"), val = bool(false)]; bool var_581_transpose_y_1 = const()[name = string("op_581_transpose_y_1"), val = bool(true)]; tensor var_581_cast_fp16 = matmul(transpose_x = var_581_transpose_x_1, transpose_y = var_581_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_581_cast_fp16")]; fp16 var_582_to_fp16 = const()[name = string("op_582_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_5_cast_fp16 = mul(x = var_581_cast_fp16, y = var_582_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_593_axes_0 = const()[name = string("op_593_axes_0"), val = tensor([-1])]; bool var_593_keep_dims_0 = const()[name = string("op_593_keep_dims_0"), val = bool(true)]; tensor var_593_cast_fp16 = reduce_sum(axes = var_593_axes_0, keep_dims = var_593_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_593_cast_fp16")]; tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_593_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; tensor var_596_perm_0 = const()[name = string("op_596_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_598 = const()[name = string("op_598"), val = tensor([1, 1, 2048])]; tensor var_596_cast_fp16 = transpose(perm = var_596_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_26")]; tensor input_19_cast_fp16 = reshape(shape = var_598, x = var_596_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262863424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264960640))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_609_axes_0 = const()[name = string("op_609_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264968896)))]; tensor var_609_cast_fp16 = layer_norm(axes = var_609_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_609_cast_fp16")]; tensor var_616 = const()[name = string("op_616"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_617 = transpose(perm = var_616, x = var_609_cast_fp16)[name = string("transpose_25")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_617)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_639_axes_0 = const()[name = string("op_639_axes_0"), val = tensor([2])]; tensor var_639 = squeeze(axes = var_639_axes_0, x = hidden_states_15)[name = string("op_639")]; tensor var_640 = const()[name = string("op_640"), val = tensor([0, 2, 1])]; tensor var_641 = transpose(perm = var_640, x = var_639)[name = string("transpose_24")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_641)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_649_axes_0 = const()[name = string("op_649_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264973056)))]; tensor var_649_cast_fp16 = layer_norm(axes = var_649_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_649_cast_fp16")]; tensor var_652 = const()[name = string("op_652"), val = tensor([0, 2, 1])]; tensor var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor([2])]; tensor var_653 = transpose(perm = var_652, x = var_649_cast_fp16)[name = string("transpose_23")]; tensor var_654 = expand_dims(axes = var_654_axes_0, x = var_653)[name = string("op_654")]; string var_661_pad_type_0 = const()[name = string("op_661_pad_type_0"), val = string("valid")]; tensor var_661_strides_0 = const()[name = string("op_661_strides_0"), val = tensor([1, 1])]; tensor var_661_pad_0 = const()[name = string("op_661_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_661_dilations_0 = const()[name = string("op_661_dilations_0"), val = tensor([1, 1])]; int32 var_661_groups_0 = const()[name = string("op_661_groups_0"), val = int32(1)]; tensor var_661 = conv(dilations = var_661_dilations_0, groups = var_661_groups_0, pad = var_661_pad_0, pad_type = var_661_pad_type_0, strides = var_661_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_654)[name = string("op_661")]; tensor var_662 = const()[name = string("op_662"), val = tensor([1, 32, 1, 64])]; tensor var_663 = reshape(shape = var_662, x = var_661)[name = string("op_663")]; string var_670_pad_type_0 = const()[name = string("op_670_pad_type_0"), val = string("valid")]; tensor var_670_strides_0 = const()[name = string("op_670_strides_0"), val = tensor([1, 1])]; tensor var_670_pad_0 = const()[name = string("op_670_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_670_dilations_0 = const()[name = string("op_670_dilations_0"), val = tensor([1, 1])]; int32 var_670_groups_0 = const()[name = string("op_670_groups_0"), val = int32(1)]; tensor var_670 = conv(dilations = var_670_dilations_0, groups = var_670_groups_0, pad = var_670_pad_0, pad_type = var_670_pad_type_0, strides = var_670_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_654)[name = string("op_670")]; tensor var_671 = const()[name = string("op_671"), val = tensor([1, 8, 1, 64])]; tensor var_672 = reshape(shape = var_671, x = var_670)[name = string("op_672")]; string var_679_pad_type_0 = const()[name = string("op_679_pad_type_0"), val = string("valid")]; tensor var_679_strides_0 = const()[name = string("op_679_strides_0"), val = tensor([1, 1])]; tensor var_679_pad_0 = const()[name = string("op_679_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_679_dilations_0 = const()[name = string("op_679_dilations_0"), val = tensor([1, 1])]; int32 var_679_groups_0 = const()[name = string("op_679_groups_0"), val = int32(1)]; tensor var_679 = conv(dilations = var_679_dilations_0, groups = var_679_groups_0, pad = var_679_pad_0, pad_type = var_679_pad_type_0, strides = var_679_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_654)[name = string("op_679")]; tensor var_680 = const()[name = string("op_680"), val = tensor([1, 8, 1, 64])]; tensor var_681 = reshape(shape = var_680, x = var_679)[name = string("op_681")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_663)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_663)[name = string("x2_9")]; tensor var_695_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_695_cast_fp16")]; tensor var_696_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_696_cast_fp16")]; tensor var_697_cast_fp16 = sub(x = var_695_cast_fp16, y = var_696_cast_fp16)[name = string("op_697_cast_fp16")]; tensor var_698_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_698_cast_fp16")]; tensor var_699_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_699_cast_fp16")]; tensor var_700_cast_fp16 = add(x = var_698_cast_fp16, y = var_699_cast_fp16)[name = string("op_700_cast_fp16")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9_cast_fp16 = concat(axis = var_55, interleave = rotated_9_interleave_0, values = (var_697_cast_fp16, var_700_cast_fp16))[name = string("rotated_9_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_672)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_672)[name = string("x2_11")]; tensor var_716_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_716_cast_fp16")]; tensor var_717_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_717_cast_fp16")]; tensor var_718_cast_fp16 = sub(x = var_716_cast_fp16, y = var_717_cast_fp16)[name = string("op_718_cast_fp16")]; tensor var_719_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_719_cast_fp16")]; tensor var_720_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_720_cast_fp16")]; tensor var_721_cast_fp16 = add(x = var_719_cast_fp16, y = var_720_cast_fp16)[name = string("op_721_cast_fp16")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11_cast_fp16 = concat(axis = var_55, interleave = rotated_11_interleave_0, values = (var_718_cast_fp16, var_721_cast_fp16))[name = string("rotated_11_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([10])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([11])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_356, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([26])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([27])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_356, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_681, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_741_begin_0 = const()[name = string("op_741_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_741_end_0 = const()[name = string("op_741_end_0"), val = tensor([11, 8, 1546, 64])]; tensor var_741_end_mask_0 = const()[name = string("op_741_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_741_cast_fp16 = slice_by_index(begin = var_741_begin_0, end = var_741_end_0, end_mask = var_741_end_mask_0, x = coreml_update_state_21)[name = string("op_741_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_741_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_743_begin_0 = const()[name = string("op_743_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_743_end_0 = const()[name = string("op_743_end_0"), val = tensor([27, 8, 1546, 64])]; tensor var_743_end_mask_0 = const()[name = string("op_743_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_743_cast_fp16 = slice_by_index(begin = var_743_begin_0, end = var_743_end_0, end_mask = var_743_end_mask_0, x = coreml_update_state_21)[name = string("op_743_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_743_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_752 = const()[name = string("op_752"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_752, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_756 = const()[name = string("op_756"), val = tensor([1, -1, 1546, 64])]; tensor key_states_11_cast_fp16 = reshape(shape = var_756, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_759 = const()[name = string("op_759"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_759, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_763 = const()[name = string("op_763"), val = tensor([1, -1, 1546, 64])]; tensor value_states_11_cast_fp16 = reshape(shape = var_763, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; bool var_766_transpose_x_1 = const()[name = string("op_766_transpose_x_1"), val = bool(false)]; bool var_766_transpose_y_1 = const()[name = string("op_766_transpose_y_1"), val = bool(true)]; tensor var_766_cast_fp16 = matmul(transpose_x = var_766_transpose_x_1, transpose_y = var_766_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_766_cast_fp16")]; fp16 var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_9_cast_fp16 = mul(x = var_766_cast_fp16, y = var_767_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_778_axes_0 = const()[name = string("op_778_axes_0"), val = tensor([-1])]; bool var_778_keep_dims_0 = const()[name = string("op_778_keep_dims_0"), val = bool(true)]; tensor var_778_cast_fp16 = reduce_sum(axes = var_778_axes_0, keep_dims = var_778_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_778_cast_fp16")]; tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_778_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_781_perm_0 = const()[name = string("op_781_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_783 = const()[name = string("op_783"), val = tensor([1, 1, 2048])]; tensor var_781_cast_fp16 = transpose(perm = var_781_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_22")]; tensor input_33_cast_fp16 = reshape(shape = var_783, x = var_781_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264977216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267074432))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_794_axes_0 = const()[name = string("op_794_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267082688)))]; tensor var_794_cast_fp16 = layer_norm(axes = var_794_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_794_cast_fp16")]; tensor var_801 = const()[name = string("op_801"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_802 = transpose(perm = var_801, x = var_794_cast_fp16)[name = string("transpose_21")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_802)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_824_axes_0 = const()[name = string("op_824_axes_0"), val = tensor([2])]; tensor var_824 = squeeze(axes = var_824_axes_0, x = hidden_states_23)[name = string("op_824")]; tensor var_825 = const()[name = string("op_825"), val = tensor([0, 2, 1])]; tensor var_826 = transpose(perm = var_825, x = var_824)[name = string("transpose_20")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_826)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_834_axes_0 = const()[name = string("op_834_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267086848)))]; tensor var_834_cast_fp16 = layer_norm(axes = var_834_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_834_cast_fp16")]; tensor var_837 = const()[name = string("op_837"), val = tensor([0, 2, 1])]; tensor var_839_axes_0 = const()[name = string("op_839_axes_0"), val = tensor([2])]; tensor var_838 = transpose(perm = var_837, x = var_834_cast_fp16)[name = string("transpose_19")]; tensor var_839 = expand_dims(axes = var_839_axes_0, x = var_838)[name = string("op_839")]; string var_846_pad_type_0 = const()[name = string("op_846_pad_type_0"), val = string("valid")]; tensor var_846_strides_0 = const()[name = string("op_846_strides_0"), val = tensor([1, 1])]; tensor var_846_pad_0 = const()[name = string("op_846_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_846_dilations_0 = const()[name = string("op_846_dilations_0"), val = tensor([1, 1])]; int32 var_846_groups_0 = const()[name = string("op_846_groups_0"), val = int32(1)]; tensor var_846 = conv(dilations = var_846_dilations_0, groups = var_846_groups_0, pad = var_846_pad_0, pad_type = var_846_pad_type_0, strides = var_846_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_839)[name = string("op_846")]; tensor var_847 = const()[name = string("op_847"), val = tensor([1, 32, 1, 64])]; tensor var_848 = reshape(shape = var_847, x = var_846)[name = string("op_848")]; string var_855_pad_type_0 = const()[name = string("op_855_pad_type_0"), val = string("valid")]; tensor var_855_strides_0 = const()[name = string("op_855_strides_0"), val = tensor([1, 1])]; tensor var_855_pad_0 = const()[name = string("op_855_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_855_dilations_0 = const()[name = string("op_855_dilations_0"), val = tensor([1, 1])]; int32 var_855_groups_0 = const()[name = string("op_855_groups_0"), val = int32(1)]; tensor var_855 = conv(dilations = var_855_dilations_0, groups = var_855_groups_0, pad = var_855_pad_0, pad_type = var_855_pad_type_0, strides = var_855_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_839)[name = string("op_855")]; tensor var_856 = const()[name = string("op_856"), val = tensor([1, 8, 1, 64])]; tensor var_857 = reshape(shape = var_856, x = var_855)[name = string("op_857")]; string var_864_pad_type_0 = const()[name = string("op_864_pad_type_0"), val = string("valid")]; tensor var_864_strides_0 = const()[name = string("op_864_strides_0"), val = tensor([1, 1])]; tensor var_864_pad_0 = const()[name = string("op_864_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_864_dilations_0 = const()[name = string("op_864_dilations_0"), val = tensor([1, 1])]; int32 var_864_groups_0 = const()[name = string("op_864_groups_0"), val = int32(1)]; tensor var_864 = conv(dilations = var_864_dilations_0, groups = var_864_groups_0, pad = var_864_pad_0, pad_type = var_864_pad_type_0, strides = var_864_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_839)[name = string("op_864")]; tensor var_865 = const()[name = string("op_865"), val = tensor([1, 8, 1, 64])]; tensor var_866 = reshape(shape = var_865, x = var_864)[name = string("op_866")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_848)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_848)[name = string("x2_13")]; tensor var_880_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_880_cast_fp16")]; tensor var_881_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_881_cast_fp16")]; tensor var_882_cast_fp16 = sub(x = var_880_cast_fp16, y = var_881_cast_fp16)[name = string("op_882_cast_fp16")]; tensor var_883_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_883_cast_fp16")]; tensor var_884_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_884_cast_fp16")]; tensor var_885_cast_fp16 = add(x = var_883_cast_fp16, y = var_884_cast_fp16)[name = string("op_885_cast_fp16")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13_cast_fp16 = concat(axis = var_55, interleave = rotated_13_interleave_0, values = (var_882_cast_fp16, var_885_cast_fp16))[name = string("rotated_13_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_857)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_857)[name = string("x2_15")]; tensor var_901_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_901_cast_fp16")]; tensor var_902_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_902_cast_fp16")]; tensor var_903_cast_fp16 = sub(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = string("op_903_cast_fp16")]; tensor var_904_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_904_cast_fp16")]; tensor var_905_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_905_cast_fp16")]; tensor var_906_cast_fp16 = add(x = var_904_cast_fp16, y = var_905_cast_fp16)[name = string("op_906_cast_fp16")]; bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; tensor rotated_15_cast_fp16 = concat(axis = var_55, interleave = rotated_15_interleave_0, values = (var_903_cast_fp16, var_906_cast_fp16))[name = string("rotated_15_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([11])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([12])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_356, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([27])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([28])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_356, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_866, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_926_begin_0 = const()[name = string("op_926_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_926_end_0 = const()[name = string("op_926_end_0"), val = tensor([12, 8, 1546, 64])]; tensor var_926_end_mask_0 = const()[name = string("op_926_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_926_cast_fp16 = slice_by_index(begin = var_926_begin_0, end = var_926_end_0, end_mask = var_926_end_mask_0, x = coreml_update_state_23)[name = string("op_926_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_926_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_928_begin_0 = const()[name = string("op_928_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_928_end_0 = const()[name = string("op_928_end_0"), val = tensor([28, 8, 1546, 64])]; tensor var_928_end_mask_0 = const()[name = string("op_928_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = coreml_update_state_23)[name = string("op_928_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_928_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_937 = const()[name = string("op_937"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_937, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_941 = const()[name = string("op_941"), val = tensor([1, -1, 1546, 64])]; tensor key_states_15_cast_fp16 = reshape(shape = var_941, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_944 = const()[name = string("op_944"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_944, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_948 = const()[name = string("op_948"), val = tensor([1, -1, 1546, 64])]; tensor value_states_15_cast_fp16 = reshape(shape = var_948, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_951_transpose_x_1 = const()[name = string("op_951_transpose_x_1"), val = bool(false)]; bool var_951_transpose_y_1 = const()[name = string("op_951_transpose_y_1"), val = bool(true)]; tensor var_951_cast_fp16 = matmul(transpose_x = var_951_transpose_x_1, transpose_y = var_951_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_951_cast_fp16")]; fp16 var_952_to_fp16 = const()[name = string("op_952_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_13_cast_fp16 = mul(x = var_951_cast_fp16, y = var_952_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; tensor var_963_axes_0 = const()[name = string("op_963_axes_0"), val = tensor([-1])]; bool var_963_keep_dims_0 = const()[name = string("op_963_keep_dims_0"), val = bool(true)]; tensor var_963_cast_fp16 = reduce_sum(axes = var_963_axes_0, keep_dims = var_963_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_963_cast_fp16")]; tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_963_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; tensor var_966_perm_0 = const()[name = string("op_966_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_968 = const()[name = string("op_968"), val = tensor([1, 1, 2048])]; tensor var_966_cast_fp16 = transpose(perm = var_966_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_18")]; tensor input_47_cast_fp16 = reshape(shape = var_968, x = var_966_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267091008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269188224))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_979_axes_0 = const()[name = string("op_979_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196480)))]; tensor var_979_cast_fp16 = layer_norm(axes = var_979_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_979_cast_fp16")]; tensor var_986 = const()[name = string("op_986"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_987 = transpose(perm = var_986, x = var_979_cast_fp16)[name = string("transpose_17")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_987)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; tensor var_1009_axes_0 = const()[name = string("op_1009_axes_0"), val = tensor([2])]; tensor var_1009 = squeeze(axes = var_1009_axes_0, x = hidden_states_31)[name = string("op_1009")]; tensor var_1010 = const()[name = string("op_1010"), val = tensor([0, 2, 1])]; tensor var_1011 = transpose(perm = var_1010, x = var_1009)[name = string("transpose_16")]; tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1011)[name = string("hidden_states_33_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; tensor var_1019_axes_0 = const()[name = string("op_1019_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269200640)))]; tensor var_1019_cast_fp16 = layer_norm(axes = var_1019_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1019_cast_fp16")]; tensor var_1022 = const()[name = string("op_1022"), val = tensor([0, 2, 1])]; tensor var_1024_axes_0 = const()[name = string("op_1024_axes_0"), val = tensor([2])]; tensor var_1023 = transpose(perm = var_1022, x = var_1019_cast_fp16)[name = string("transpose_15")]; tensor var_1024 = expand_dims(axes = var_1024_axes_0, x = var_1023)[name = string("op_1024")]; string var_1031_pad_type_0 = const()[name = string("op_1031_pad_type_0"), val = string("valid")]; tensor var_1031_strides_0 = const()[name = string("op_1031_strides_0"), val = tensor([1, 1])]; tensor var_1031_pad_0 = const()[name = string("op_1031_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1031_dilations_0 = const()[name = string("op_1031_dilations_0"), val = tensor([1, 1])]; int32 var_1031_groups_0 = const()[name = string("op_1031_groups_0"), val = int32(1)]; tensor var_1031 = conv(dilations = var_1031_dilations_0, groups = var_1031_groups_0, pad = var_1031_pad_0, pad_type = var_1031_pad_type_0, strides = var_1031_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_1024)[name = string("op_1031")]; tensor var_1032 = const()[name = string("op_1032"), val = tensor([1, 32, 1, 64])]; tensor var_1033 = reshape(shape = var_1032, x = var_1031)[name = string("op_1033")]; string var_1040_pad_type_0 = const()[name = string("op_1040_pad_type_0"), val = string("valid")]; tensor var_1040_strides_0 = const()[name = string("op_1040_strides_0"), val = tensor([1, 1])]; tensor var_1040_pad_0 = const()[name = string("op_1040_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1040_dilations_0 = const()[name = string("op_1040_dilations_0"), val = tensor([1, 1])]; int32 var_1040_groups_0 = const()[name = string("op_1040_groups_0"), val = int32(1)]; tensor var_1040 = conv(dilations = var_1040_dilations_0, groups = var_1040_groups_0, pad = var_1040_pad_0, pad_type = var_1040_pad_type_0, strides = var_1040_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_1024)[name = string("op_1040")]; tensor var_1041 = const()[name = string("op_1041"), val = tensor([1, 8, 1, 64])]; tensor var_1042 = reshape(shape = var_1041, x = var_1040)[name = string("op_1042")]; string var_1049_pad_type_0 = const()[name = string("op_1049_pad_type_0"), val = string("valid")]; tensor var_1049_strides_0 = const()[name = string("op_1049_strides_0"), val = tensor([1, 1])]; tensor var_1049_pad_0 = const()[name = string("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1049_dilations_0 = const()[name = string("op_1049_dilations_0"), val = tensor([1, 1])]; int32 var_1049_groups_0 = const()[name = string("op_1049_groups_0"), val = int32(1)]; tensor var_1049 = conv(dilations = var_1049_dilations_0, groups = var_1049_groups_0, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1049_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_1024)[name = string("op_1049")]; tensor var_1050 = const()[name = string("op_1050"), val = tensor([1, 8, 1, 64])]; tensor var_1051 = reshape(shape = var_1050, x = var_1049)[name = string("op_1051")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1033)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1033)[name = string("x2_17")]; tensor var_1065_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1065_cast_fp16")]; tensor var_1066_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1066_cast_fp16")]; tensor var_1067_cast_fp16 = sub(x = var_1065_cast_fp16, y = var_1066_cast_fp16)[name = string("op_1067_cast_fp16")]; tensor var_1068_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1068_cast_fp16")]; tensor var_1069_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1069_cast_fp16")]; tensor var_1070_cast_fp16 = add(x = var_1068_cast_fp16, y = var_1069_cast_fp16)[name = string("op_1070_cast_fp16")]; bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; tensor rotated_17_cast_fp16 = concat(axis = var_55, interleave = rotated_17_interleave_0, values = (var_1067_cast_fp16, var_1070_cast_fp16))[name = string("rotated_17_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1042)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1042)[name = string("x2_19")]; tensor var_1086_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1086_cast_fp16")]; tensor var_1087_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1087_cast_fp16")]; tensor var_1088_cast_fp16 = sub(x = var_1086_cast_fp16, y = var_1087_cast_fp16)[name = string("op_1088_cast_fp16")]; tensor var_1089_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1089_cast_fp16")]; tensor var_1090_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1090_cast_fp16")]; tensor var_1091_cast_fp16 = add(x = var_1089_cast_fp16, y = var_1090_cast_fp16)[name = string("op_1091_cast_fp16")]; bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; tensor rotated_19_cast_fp16 = concat(axis = var_55, interleave = rotated_19_interleave_0, values = (var_1088_cast_fp16, var_1091_cast_fp16))[name = string("rotated_19_cast_fp16")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([12])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([13])]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_356, concat_35_values3_0))[name = string("concat_35")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([28])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([29])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_356, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1051, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_1111_begin_0 = const()[name = string("op_1111_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_1111_end_0 = const()[name = string("op_1111_end_0"), val = tensor([13, 8, 1546, 64])]; tensor var_1111_end_mask_0 = const()[name = string("op_1111_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = var_1111_end_0, end_mask = var_1111_end_mask_0, x = coreml_update_state_25)[name = string("op_1111_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1111_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_1113_begin_0 = const()[name = string("op_1113_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_1113_end_0 = const()[name = string("op_1113_end_0"), val = tensor([29, 8, 1546, 64])]; tensor var_1113_end_mask_0 = const()[name = string("op_1113_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1113_cast_fp16 = slice_by_index(begin = var_1113_begin_0, end = var_1113_end_0, end_mask = var_1113_end_mask_0, x = coreml_update_state_25)[name = string("op_1113_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1113_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1122 = const()[name = string("op_1122"), val = tensor([1, 4, 1, 1])]; tensor x_125_cast_fp16 = tile(reps = var_1122, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; tensor var_1126 = const()[name = string("op_1126"), val = tensor([1, -1, 1546, 64])]; tensor key_states_19_cast_fp16 = reshape(shape = var_1126, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_1129 = const()[name = string("op_1129"), val = tensor([1, 4, 1, 1])]; tensor x_131_cast_fp16 = tile(reps = var_1129, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; tensor var_1133 = const()[name = string("op_1133"), val = tensor([1, -1, 1546, 64])]; tensor value_states_19_cast_fp16 = reshape(shape = var_1133, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; bool var_1136_transpose_x_1 = const()[name = string("op_1136_transpose_x_1"), val = bool(false)]; bool var_1136_transpose_y_1 = const()[name = string("op_1136_transpose_y_1"), val = bool(true)]; tensor var_1136_cast_fp16 = matmul(transpose_x = var_1136_transpose_x_1, transpose_y = var_1136_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1136_cast_fp16")]; fp16 var_1137_to_fp16 = const()[name = string("op_1137_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_17_cast_fp16 = mul(x = var_1136_cast_fp16, y = var_1137_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; tensor var_1148_axes_0 = const()[name = string("op_1148_axes_0"), val = tensor([-1])]; bool var_1148_keep_dims_0 = const()[name = string("op_1148_keep_dims_0"), val = bool(true)]; tensor var_1148_cast_fp16 = reduce_sum(axes = var_1148_axes_0, keep_dims = var_1148_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1148_cast_fp16")]; tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1148_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_1151_perm_0 = const()[name = string("op_1151_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1153 = const()[name = string("op_1153"), val = tensor([1, 1, 2048])]; tensor var_1151_cast_fp16 = transpose(perm = var_1151_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_14")]; tensor input_61_cast_fp16 = reshape(shape = var_1153, x = var_1151_cast_fp16)[name = string("input_61_cast_fp16")]; tensor model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269204800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271302016))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1164_axes_0 = const()[name = string("op_1164_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271310272)))]; tensor var_1164_cast_fp16 = layer_norm(axes = var_1164_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1164_cast_fp16")]; tensor var_1171 = const()[name = string("op_1171"), val = tensor([0, 2, 1])]; tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; tensor var_1172 = transpose(perm = var_1171, x = var_1164_cast_fp16)[name = string("transpose_13")]; tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1172)[name = string("input_65")]; string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; tensor var_1194_axes_0 = const()[name = string("op_1194_axes_0"), val = tensor([2])]; tensor var_1194 = squeeze(axes = var_1194_axes_0, x = hidden_states_39)[name = string("op_1194")]; tensor var_1195 = const()[name = string("op_1195"), val = tensor([0, 2, 1])]; tensor var_1196 = transpose(perm = var_1195, x = var_1194)[name = string("transpose_12")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1196)[name = string("hidden_states_41_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; tensor var_1204_axes_0 = const()[name = string("op_1204_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271314432)))]; tensor var_1204_cast_fp16 = layer_norm(axes = var_1204_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1204_cast_fp16")]; tensor var_1207 = const()[name = string("op_1207"), val = tensor([0, 2, 1])]; tensor var_1209_axes_0 = const()[name = string("op_1209_axes_0"), val = tensor([2])]; tensor var_1208 = transpose(perm = var_1207, x = var_1204_cast_fp16)[name = string("transpose_11")]; tensor var_1209 = expand_dims(axes = var_1209_axes_0, x = var_1208)[name = string("op_1209")]; string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")]; tensor var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor([1, 1])]; tensor var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor([1, 1])]; int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)]; tensor var_1216 = conv(dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_1209)[name = string("op_1216")]; tensor var_1217 = const()[name = string("op_1217"), val = tensor([1, 32, 1, 64])]; tensor var_1218 = reshape(shape = var_1217, x = var_1216)[name = string("op_1218")]; string var_1225_pad_type_0 = const()[name = string("op_1225_pad_type_0"), val = string("valid")]; tensor var_1225_strides_0 = const()[name = string("op_1225_strides_0"), val = tensor([1, 1])]; tensor var_1225_pad_0 = const()[name = string("op_1225_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1225_dilations_0 = const()[name = string("op_1225_dilations_0"), val = tensor([1, 1])]; int32 var_1225_groups_0 = const()[name = string("op_1225_groups_0"), val = int32(1)]; tensor var_1225 = conv(dilations = var_1225_dilations_0, groups = var_1225_groups_0, pad = var_1225_pad_0, pad_type = var_1225_pad_type_0, strides = var_1225_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_1209)[name = string("op_1225")]; tensor var_1226 = const()[name = string("op_1226"), val = tensor([1, 8, 1, 64])]; tensor var_1227 = reshape(shape = var_1226, x = var_1225)[name = string("op_1227")]; string var_1234_pad_type_0 = const()[name = string("op_1234_pad_type_0"), val = string("valid")]; tensor var_1234_strides_0 = const()[name = string("op_1234_strides_0"), val = tensor([1, 1])]; tensor var_1234_pad_0 = const()[name = string("op_1234_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1234_dilations_0 = const()[name = string("op_1234_dilations_0"), val = tensor([1, 1])]; int32 var_1234_groups_0 = const()[name = string("op_1234_groups_0"), val = int32(1)]; tensor var_1234 = conv(dilations = var_1234_dilations_0, groups = var_1234_groups_0, pad = var_1234_pad_0, pad_type = var_1234_pad_type_0, strides = var_1234_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_1209)[name = string("op_1234")]; tensor var_1235 = const()[name = string("op_1235"), val = tensor([1, 8, 1, 64])]; tensor var_1236 = reshape(shape = var_1235, x = var_1234)[name = string("op_1236")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1218)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1218)[name = string("x2_21")]; tensor var_1250_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1250_cast_fp16")]; tensor var_1251_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1251_cast_fp16")]; tensor var_1252_cast_fp16 = sub(x = var_1250_cast_fp16, y = var_1251_cast_fp16)[name = string("op_1252_cast_fp16")]; tensor var_1253_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1253_cast_fp16")]; tensor var_1254_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1254_cast_fp16")]; tensor var_1255_cast_fp16 = add(x = var_1253_cast_fp16, y = var_1254_cast_fp16)[name = string("op_1255_cast_fp16")]; bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; tensor rotated_21_cast_fp16 = concat(axis = var_55, interleave = rotated_21_interleave_0, values = (var_1252_cast_fp16, var_1255_cast_fp16))[name = string("rotated_21_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1227)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1227)[name = string("x2_23")]; tensor var_1271_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1271_cast_fp16")]; tensor var_1272_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1272_cast_fp16")]; tensor var_1273_cast_fp16 = sub(x = var_1271_cast_fp16, y = var_1272_cast_fp16)[name = string("op_1273_cast_fp16")]; tensor var_1274_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1274_cast_fp16")]; tensor var_1275_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1275_cast_fp16")]; tensor var_1276_cast_fp16 = add(x = var_1274_cast_fp16, y = var_1275_cast_fp16)[name = string("op_1276_cast_fp16")]; bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; tensor rotated_23_cast_fp16 = concat(axis = var_55, interleave = rotated_23_interleave_0, values = (var_1273_cast_fp16, var_1276_cast_fp16))[name = string("rotated_23_cast_fp16")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([13])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([14])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_356, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([29])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([30])]; int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_356, concat_47_values3_0))[name = string("concat_47")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1236, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_1296_begin_0 = const()[name = string("op_1296_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_1296_end_0 = const()[name = string("op_1296_end_0"), val = tensor([14, 8, 1546, 64])]; tensor var_1296_end_mask_0 = const()[name = string("op_1296_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = coreml_update_state_27)[name = string("op_1296_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1296_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_1298_begin_0 = const()[name = string("op_1298_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_1298_end_0 = const()[name = string("op_1298_end_0"), val = tensor([30, 8, 1546, 64])]; tensor var_1298_end_mask_0 = const()[name = string("op_1298_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1298_cast_fp16 = slice_by_index(begin = var_1298_begin_0, end = var_1298_end_0, end_mask = var_1298_end_mask_0, x = coreml_update_state_27)[name = string("op_1298_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1298_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1307 = const()[name = string("op_1307"), val = tensor([1, 4, 1, 1])]; tensor x_153_cast_fp16 = tile(reps = var_1307, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1311 = const()[name = string("op_1311"), val = tensor([1, -1, 1546, 64])]; tensor key_states_23_cast_fp16 = reshape(shape = var_1311, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1314 = const()[name = string("op_1314"), val = tensor([1, 4, 1, 1])]; tensor x_159_cast_fp16 = tile(reps = var_1314, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; tensor var_1318 = const()[name = string("op_1318"), val = tensor([1, -1, 1546, 64])]; tensor value_states_23_cast_fp16 = reshape(shape = var_1318, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; bool var_1321_transpose_x_1 = const()[name = string("op_1321_transpose_x_1"), val = bool(false)]; bool var_1321_transpose_y_1 = const()[name = string("op_1321_transpose_y_1"), val = bool(true)]; tensor var_1321_cast_fp16 = matmul(transpose_x = var_1321_transpose_x_1, transpose_y = var_1321_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1321_cast_fp16")]; fp16 var_1322_to_fp16 = const()[name = string("op_1322_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_21_cast_fp16 = mul(x = var_1321_cast_fp16, y = var_1322_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; tensor var_1333_axes_0 = const()[name = string("op_1333_axes_0"), val = tensor([-1])]; bool var_1333_keep_dims_0 = const()[name = string("op_1333_keep_dims_0"), val = bool(true)]; tensor var_1333_cast_fp16 = reduce_sum(axes = var_1333_axes_0, keep_dims = var_1333_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1333_cast_fp16")]; tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1333_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; tensor var_1336_perm_0 = const()[name = string("op_1336_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1338 = const()[name = string("op_1338"), val = tensor([1, 1, 2048])]; tensor var_1336_cast_fp16 = transpose(perm = var_1336_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_10")]; tensor input_75_cast_fp16 = reshape(shape = var_1338, x = var_1336_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271318592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273415808))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; tensor var_1349_axes_0 = const()[name = string("op_1349_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273424064)))]; tensor var_1349_cast_fp16 = layer_norm(axes = var_1349_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1349_cast_fp16")]; tensor var_1356 = const()[name = string("op_1356"), val = tensor([0, 2, 1])]; tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; tensor var_1357 = transpose(perm = var_1356, x = var_1349_cast_fp16)[name = string("transpose_9")]; tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1357)[name = string("input_79")]; string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; tensor var_1379_axes_0 = const()[name = string("op_1379_axes_0"), val = tensor([2])]; tensor var_1379 = squeeze(axes = var_1379_axes_0, x = hidden_states_47)[name = string("op_1379")]; tensor var_1380 = const()[name = string("op_1380"), val = tensor([0, 2, 1])]; tensor var_1381 = transpose(perm = var_1380, x = var_1379)[name = string("transpose_8")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1381)[name = string("hidden_states_49_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; tensor var_1389_axes_0 = const()[name = string("op_1389_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273428224)))]; tensor var_1389_cast_fp16 = layer_norm(axes = var_1389_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1389_cast_fp16")]; tensor var_1392 = const()[name = string("op_1392"), val = tensor([0, 2, 1])]; tensor var_1394_axes_0 = const()[name = string("op_1394_axes_0"), val = tensor([2])]; tensor var_1393 = transpose(perm = var_1392, x = var_1389_cast_fp16)[name = string("transpose_7")]; tensor var_1394 = expand_dims(axes = var_1394_axes_0, x = var_1393)[name = string("op_1394")]; string var_1401_pad_type_0 = const()[name = string("op_1401_pad_type_0"), val = string("valid")]; tensor var_1401_strides_0 = const()[name = string("op_1401_strides_0"), val = tensor([1, 1])]; tensor var_1401_pad_0 = const()[name = string("op_1401_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1401_dilations_0 = const()[name = string("op_1401_dilations_0"), val = tensor([1, 1])]; int32 var_1401_groups_0 = const()[name = string("op_1401_groups_0"), val = int32(1)]; tensor var_1401 = conv(dilations = var_1401_dilations_0, groups = var_1401_groups_0, pad = var_1401_pad_0, pad_type = var_1401_pad_type_0, strides = var_1401_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_1394)[name = string("op_1401")]; tensor var_1402 = const()[name = string("op_1402"), val = tensor([1, 32, 1, 64])]; tensor var_1403 = reshape(shape = var_1402, x = var_1401)[name = string("op_1403")]; string var_1410_pad_type_0 = const()[name = string("op_1410_pad_type_0"), val = string("valid")]; tensor var_1410_strides_0 = const()[name = string("op_1410_strides_0"), val = tensor([1, 1])]; tensor var_1410_pad_0 = const()[name = string("op_1410_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1410_dilations_0 = const()[name = string("op_1410_dilations_0"), val = tensor([1, 1])]; int32 var_1410_groups_0 = const()[name = string("op_1410_groups_0"), val = int32(1)]; tensor var_1410 = conv(dilations = var_1410_dilations_0, groups = var_1410_groups_0, pad = var_1410_pad_0, pad_type = var_1410_pad_type_0, strides = var_1410_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_1394)[name = string("op_1410")]; tensor var_1411 = const()[name = string("op_1411"), val = tensor([1, 8, 1, 64])]; tensor var_1412 = reshape(shape = var_1411, x = var_1410)[name = string("op_1412")]; string var_1419_pad_type_0 = const()[name = string("op_1419_pad_type_0"), val = string("valid")]; tensor var_1419_strides_0 = const()[name = string("op_1419_strides_0"), val = tensor([1, 1])]; tensor var_1419_pad_0 = const()[name = string("op_1419_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1419_dilations_0 = const()[name = string("op_1419_dilations_0"), val = tensor([1, 1])]; int32 var_1419_groups_0 = const()[name = string("op_1419_groups_0"), val = int32(1)]; tensor var_1419 = conv(dilations = var_1419_dilations_0, groups = var_1419_groups_0, pad = var_1419_pad_0, pad_type = var_1419_pad_type_0, strides = var_1419_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_1394)[name = string("op_1419")]; tensor var_1420 = const()[name = string("op_1420"), val = tensor([1, 8, 1, 64])]; tensor var_1421 = reshape(shape = var_1420, x = var_1419)[name = string("op_1421")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1403)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1403)[name = string("x2_25")]; tensor var_1435_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1435_cast_fp16")]; tensor var_1436_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1436_cast_fp16")]; tensor var_1437_cast_fp16 = sub(x = var_1435_cast_fp16, y = var_1436_cast_fp16)[name = string("op_1437_cast_fp16")]; tensor var_1438_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1438_cast_fp16")]; tensor var_1439_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1439_cast_fp16")]; tensor var_1440_cast_fp16 = add(x = var_1438_cast_fp16, y = var_1439_cast_fp16)[name = string("op_1440_cast_fp16")]; bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; tensor rotated_25_cast_fp16 = concat(axis = var_55, interleave = rotated_25_interleave_0, values = (var_1437_cast_fp16, var_1440_cast_fp16))[name = string("rotated_25_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1412)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1412)[name = string("x2_27")]; tensor var_1456_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1456_cast_fp16")]; tensor var_1457_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1457_cast_fp16")]; tensor var_1458_cast_fp16 = sub(x = var_1456_cast_fp16, y = var_1457_cast_fp16)[name = string("op_1458_cast_fp16")]; tensor var_1459_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1459_cast_fp16")]; tensor var_1460_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1460_cast_fp16")]; tensor var_1461_cast_fp16 = add(x = var_1459_cast_fp16, y = var_1460_cast_fp16)[name = string("op_1461_cast_fp16")]; bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; tensor rotated_27_cast_fp16 = concat(axis = var_55, interleave = rotated_27_interleave_0, values = (var_1458_cast_fp16, var_1461_cast_fp16))[name = string("rotated_27_cast_fp16")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([14])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([15])]; int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_356, concat_51_values3_0))[name = string("concat_51")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([30])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([31])]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_356, concat_55_values3_0))[name = string("concat_55")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1421, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_1481_begin_0 = const()[name = string("op_1481_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_1481_end_0 = const()[name = string("op_1481_end_0"), val = tensor([15, 8, 1546, 64])]; tensor var_1481_end_mask_0 = const()[name = string("op_1481_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1481_cast_fp16 = slice_by_index(begin = var_1481_begin_0, end = var_1481_end_0, end_mask = var_1481_end_mask_0, x = coreml_update_state_29)[name = string("op_1481_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1481_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_1483_begin_0 = const()[name = string("op_1483_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_1483_end_0 = const()[name = string("op_1483_end_0"), val = tensor([31, 8, 1546, 64])]; tensor var_1483_end_mask_0 = const()[name = string("op_1483_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1483_cast_fp16 = slice_by_index(begin = var_1483_begin_0, end = var_1483_end_0, end_mask = var_1483_end_mask_0, x = coreml_update_state_29)[name = string("op_1483_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1483_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; tensor var_1492 = const()[name = string("op_1492"), val = tensor([1, 4, 1, 1])]; tensor x_181_cast_fp16 = tile(reps = var_1492, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_1496 = const()[name = string("op_1496"), val = tensor([1, -1, 1546, 64])]; tensor key_states_27_cast_fp16 = reshape(shape = var_1496, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; tensor var_1499 = const()[name = string("op_1499"), val = tensor([1, 4, 1, 1])]; tensor x_187_cast_fp16 = tile(reps = var_1499, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_1503 = const()[name = string("op_1503"), val = tensor([1, -1, 1546, 64])]; tensor value_states_27_cast_fp16 = reshape(shape = var_1503, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_1506_transpose_x_1 = const()[name = string("op_1506_transpose_x_1"), val = bool(false)]; bool var_1506_transpose_y_1 = const()[name = string("op_1506_transpose_y_1"), val = bool(true)]; tensor var_1506_cast_fp16 = matmul(transpose_x = var_1506_transpose_x_1, transpose_y = var_1506_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1506_cast_fp16")]; fp16 var_1507_to_fp16 = const()[name = string("op_1507_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_25_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; tensor var_1518_axes_0 = const()[name = string("op_1518_axes_0"), val = tensor([-1])]; bool var_1518_keep_dims_0 = const()[name = string("op_1518_keep_dims_0"), val = bool(true)]; tensor var_1518_cast_fp16 = reduce_sum(axes = var_1518_axes_0, keep_dims = var_1518_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1518_cast_fp16")]; tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1518_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_1521_perm_0 = const()[name = string("op_1521_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1523 = const()[name = string("op_1523"), val = tensor([1, 1, 2048])]; tensor var_1521_cast_fp16 = transpose(perm = var_1521_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_6")]; tensor input_89_cast_fp16 = reshape(shape = var_1523, x = var_1521_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273432384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275529600))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_1534_axes_0 = const()[name = string("op_1534_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275537856)))]; tensor var_1534_cast_fp16 = layer_norm(axes = var_1534_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1534_cast_fp16")]; tensor var_1541 = const()[name = string("op_1541"), val = tensor([0, 2, 1])]; tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; tensor var_1542 = transpose(perm = var_1541, x = var_1534_cast_fp16)[name = string("transpose_5")]; tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1542)[name = string("input_93")]; string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; tensor var_1564_axes_0 = const()[name = string("op_1564_axes_0"), val = tensor([2])]; tensor var_1564 = squeeze(axes = var_1564_axes_0, x = hidden_states_55)[name = string("op_1564")]; tensor var_1565 = const()[name = string("op_1565"), val = tensor([0, 2, 1])]; tensor var_1566 = transpose(perm = var_1565, x = var_1564)[name = string("transpose_4")]; tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1566)[name = string("hidden_states_57_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1574_axes_0 = const()[name = string("op_1574_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275542016)))]; tensor var_1574_cast_fp16 = layer_norm(axes = var_1574_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1574_cast_fp16")]; tensor var_1577 = const()[name = string("op_1577"), val = tensor([0, 2, 1])]; tensor var_1579_axes_0 = const()[name = string("op_1579_axes_0"), val = tensor([2])]; tensor var_1578 = transpose(perm = var_1577, x = var_1574_cast_fp16)[name = string("transpose_3")]; tensor var_1579 = expand_dims(axes = var_1579_axes_0, x = var_1578)[name = string("op_1579")]; string var_1586_pad_type_0 = const()[name = string("op_1586_pad_type_0"), val = string("valid")]; tensor var_1586_strides_0 = const()[name = string("op_1586_strides_0"), val = tensor([1, 1])]; tensor var_1586_pad_0 = const()[name = string("op_1586_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1586_dilations_0 = const()[name = string("op_1586_dilations_0"), val = tensor([1, 1])]; int32 var_1586_groups_0 = const()[name = string("op_1586_groups_0"), val = int32(1)]; tensor var_1586 = conv(dilations = var_1586_dilations_0, groups = var_1586_groups_0, pad = var_1586_pad_0, pad_type = var_1586_pad_type_0, strides = var_1586_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_1579)[name = string("op_1586")]; tensor var_1587 = const()[name = string("op_1587"), val = tensor([1, 32, 1, 64])]; tensor var_1588 = reshape(shape = var_1587, x = var_1586)[name = string("op_1588")]; string var_1595_pad_type_0 = const()[name = string("op_1595_pad_type_0"), val = string("valid")]; tensor var_1595_strides_0 = const()[name = string("op_1595_strides_0"), val = tensor([1, 1])]; tensor var_1595_pad_0 = const()[name = string("op_1595_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1595_dilations_0 = const()[name = string("op_1595_dilations_0"), val = tensor([1, 1])]; int32 var_1595_groups_0 = const()[name = string("op_1595_groups_0"), val = int32(1)]; tensor var_1595 = conv(dilations = var_1595_dilations_0, groups = var_1595_groups_0, pad = var_1595_pad_0, pad_type = var_1595_pad_type_0, strides = var_1595_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_1579)[name = string("op_1595")]; tensor var_1596 = const()[name = string("op_1596"), val = tensor([1, 8, 1, 64])]; tensor var_1597 = reshape(shape = var_1596, x = var_1595)[name = string("op_1597")]; string var_1604_pad_type_0 = const()[name = string("op_1604_pad_type_0"), val = string("valid")]; tensor var_1604_strides_0 = const()[name = string("op_1604_strides_0"), val = tensor([1, 1])]; tensor var_1604_pad_0 = const()[name = string("op_1604_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1604_dilations_0 = const()[name = string("op_1604_dilations_0"), val = tensor([1, 1])]; int32 var_1604_groups_0 = const()[name = string("op_1604_groups_0"), val = int32(1)]; tensor var_1604 = conv(dilations = var_1604_dilations_0, groups = var_1604_groups_0, pad = var_1604_pad_0, pad_type = var_1604_pad_type_0, strides = var_1604_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_1579)[name = string("op_1604")]; tensor var_1605 = const()[name = string("op_1605"), val = tensor([1, 8, 1, 64])]; tensor var_1606 = reshape(shape = var_1605, x = var_1604)[name = string("op_1606")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1588)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1588)[name = string("x2_29")]; tensor var_1620_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1620_cast_fp16")]; tensor var_1621_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1621_cast_fp16")]; tensor var_1622_cast_fp16 = sub(x = var_1620_cast_fp16, y = var_1621_cast_fp16)[name = string("op_1622_cast_fp16")]; tensor var_1623_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1623_cast_fp16")]; tensor var_1624_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1624_cast_fp16")]; tensor var_1625_cast_fp16 = add(x = var_1623_cast_fp16, y = var_1624_cast_fp16)[name = string("op_1625_cast_fp16")]; bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; tensor rotated_29_cast_fp16 = concat(axis = var_55, interleave = rotated_29_interleave_0, values = (var_1622_cast_fp16, var_1625_cast_fp16))[name = string("rotated_29_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1597)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1597)[name = string("x2")]; tensor var_1641_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1641_cast_fp16")]; tensor var_1642_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1642_cast_fp16")]; tensor var_1643_cast_fp16 = sub(x = var_1641_cast_fp16, y = var_1642_cast_fp16)[name = string("op_1643_cast_fp16")]; tensor var_1644_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1644_cast_fp16")]; tensor var_1645_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1645_cast_fp16")]; tensor var_1646_cast_fp16 = add(x = var_1644_cast_fp16, y = var_1645_cast_fp16)[name = string("op_1646_cast_fp16")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated_cast_fp16 = concat(axis = var_55, interleave = rotated_interleave_0, values = (var_1643_cast_fp16, var_1646_cast_fp16))[name = string("rotated_cast_fp16")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([15])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([16])]; int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_356, concat_59_values3_0))[name = string("concat_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_cast_fp16, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([31])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([32])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_356, concat_63_values3_0))[name = string("concat_63")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1606, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_1666_begin_0 = const()[name = string("op_1666_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_1666_end_0 = const()[name = string("op_1666_end_0"), val = tensor([16, 8, 1546, 64])]; tensor var_1666_end_mask_0 = const()[name = string("op_1666_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1666_cast_fp16 = slice_by_index(begin = var_1666_begin_0, end = var_1666_end_0, end_mask = var_1666_end_mask_0, x = coreml_update_state_31)[name = string("op_1666_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1666_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_1668_begin_0 = const()[name = string("op_1668_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_1668_end_0 = const()[name = string("op_1668_end_0"), val = tensor([1, 8, 1546, 64])]; tensor var_1668_end_mask_0 = const()[name = string("op_1668_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1668_cast_fp16 = slice_by_index(begin = var_1668_begin_0, end = var_1668_end_0, end_mask = var_1668_end_mask_0, x = coreml_update_state_31)[name = string("op_1668_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1668_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_1677 = const()[name = string("op_1677"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_1677, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_1681 = const()[name = string("op_1681"), val = tensor([1, -1, 1546, 64])]; tensor key_states_cast_fp16 = reshape(shape = var_1681, x = x_209_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_1684 = const()[name = string("op_1684"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_1684, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; tensor var_1688 = const()[name = string("op_1688"), val = tensor([1, -1, 1546, 64])]; tensor value_states_cast_fp16 = reshape(shape = var_1688, x = x_215_cast_fp16)[name = string("value_states_cast_fp16")]; bool var_1691_transpose_x_1 = const()[name = string("op_1691_transpose_x_1"), val = bool(false)]; bool var_1691_transpose_y_1 = const()[name = string("op_1691_transpose_y_1"), val = bool(true)]; tensor var_1691_cast_fp16 = matmul(transpose_x = var_1691_transpose_x_1, transpose_y = var_1691_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_cast_fp16)[name = string("op_1691_cast_fp16")]; fp16 var_1692_to_fp16 = const()[name = string("op_1692_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_29_cast_fp16 = mul(x = var_1691_cast_fp16, y = var_1692_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_1703_axes_0 = const()[name = string("op_1703_axes_0"), val = tensor([-1])]; bool var_1703_keep_dims_0 = const()[name = string("op_1703_keep_dims_0"), val = bool(true)]; tensor var_1703_cast_fp16 = reduce_sum(axes = var_1703_axes_0, keep_dims = var_1703_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1703_cast_fp16")]; tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1703_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_43_cast_fp16")]; tensor var_1706_perm_0 = const()[name = string("op_1706_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1708 = const()[name = string("op_1708"), val = tensor([1, 1, 2048])]; tensor var_1706_cast_fp16 = transpose(perm = var_1706_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_2")]; tensor input_103_cast_fp16 = reshape(shape = var_1708, x = var_1706_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275546176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277643392))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor mean_31_axes_0 = const()[name = string("mean_31_axes_0"), val = tensor([-1])]; bool mean_31_keep_dims_0 = const()[name = string("mean_31_keep_dims_0"), val = bool(true)]; tensor mean_31_cast_fp16 = reduce_mean(axes = mean_31_axes_0, keep_dims = mean_31_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_31_cast_fp16")]; tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_31_cast_fp16)[name = string("input_105_cast_fp16")]; tensor var_1719_axes_0 = const()[name = string("op_1719_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277651648)))]; tensor var_1719_cast_fp16 = layer_norm(axes = var_1719_axes_0, epsilon = var_50_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1719_cast_fp16")]; tensor var_1726 = const()[name = string("op_1726"), val = tensor([0, 2, 1])]; tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; tensor var_1727 = transpose(perm = var_1726, x = var_1719_cast_fp16)[name = string("transpose_1")]; tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1727)[name = string("input_107")]; string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states")]; tensor gate_states = silu(x = input_109)[name = string("gate_states")]; tensor input_111 = mul(x = gate_states, y = up_states)[name = string("input_111")]; string hidden_states_63_pad_type_0 = const()[name = string("hidden_states_63_pad_type_0"), val = string("valid")]; tensor hidden_states_63_strides_0 = const()[name = string("hidden_states_63_strides_0"), val = tensor([1, 1])]; tensor hidden_states_63_pad_0 = const()[name = string("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_63_dilations_0 = const()[name = string("hidden_states_63_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_63_groups_0 = const()[name = string("hidden_states_63_groups_0"), val = int32(1)]; tensor hidden_states_63 = conv(dilations = hidden_states_63_dilations_0, groups = hidden_states_63_groups_0, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = hidden_states_63_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_111)[name = string("hidden_states_63")]; tensor var_1749_axes_0 = const()[name = string("op_1749_axes_0"), val = tensor([2])]; tensor var_1749 = squeeze(axes = var_1749_axes_0, x = hidden_states_63)[name = string("op_1749")]; tensor var_1750 = const()[name = string("op_1750"), val = tensor([0, 2, 1])]; tensor var_1751 = transpose(perm = var_1750, x = var_1749)[name = string("transpose_0")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_61_cast_fp16, y = var_1751)[name = string("hidden_states_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_cast_fp16 = sub(x = hidden_states_cast_fp16, y = mean_cast_fp16)[name = string("input_cast_fp16")]; tensor var_1759_axes_0 = const()[name = string("op_1759_axes_0"), val = tensor([-1])]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277655808)))]; tensor output_hidden_states = layer_norm(axes = var_1759_axes_0, epsilon = var_50_to_fp16, gamma = model_model_norm_weight_to_fp16, x = input_cast_fp16)[name = string("op_1759_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")]; tensor model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")]; tensor model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")]; tensor model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")]; tensor model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")]; tensor model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")]; tensor model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")]; tensor model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")]; int32 var_50 = const()[name = string("op_50"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_261_axis_0 = const()[name = string("op_261_axis_0"), val = int32(1)]; int32 var_261_batch_dims_0 = const()[name = string("op_261_batch_dims_0"), val = int32(0)]; bool var_261_validate_indices_0 = const()[name = string("op_261_validate_indices_0"), val = bool(false)]; tensor var_61_to_fp16 = const()[name = string("op_61_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243964032)))]; tensor var_261_cast_fp16 = gather(axis = var_261_axis_0, batch_dims = var_261_batch_dims_0, indices = select_0, validate_indices = var_261_validate_indices_0, x = var_61_to_fp16)[name = string("op_261_cast_fp16")]; tensor var_262 = const()[name = string("op_262"), val = tensor([1, 64, 1, 64])]; tensor cos_1_cast_fp16 = reshape(shape = var_262, x = var_261_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_266_axis_0 = const()[name = string("op_266_axis_0"), val = int32(1)]; int32 var_266_batch_dims_0 = const()[name = string("op_266_batch_dims_0"), val = int32(0)]; bool var_266_validate_indices_0 = const()[name = string("op_266_validate_indices_0"), val = bool(false)]; tensor var_56_to_fp16 = const()[name = string("op_56_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752)))]; tensor var_266_cast_fp16 = gather(axis = var_266_axis_0, batch_dims = var_266_batch_dims_0, indices = select_0, validate_indices = var_266_validate_indices_0, x = var_56_to_fp16)[name = string("op_266_cast_fp16")]; tensor var_267 = const()[name = string("op_267"), val = tensor([1, 64, 1, 64])]; tensor sin_1_cast_fp16 = reshape(shape = var_267, x = var_266_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_277_axes_0 = const()[name = string("op_277_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260741312)))]; fp16 var_52_to_fp16 = const()[name = string("op_52_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_277_cast_fp16 = layer_norm(axes = var_277_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_277_cast_fp16")]; tensor var_281 = const()[name = string("op_281"), val = tensor([0, 2, 1])]; tensor var_283_axes_0 = const()[name = string("op_283_axes_0"), val = tensor([2])]; tensor var_282 = transpose(perm = var_281, x = var_277_cast_fp16)[name = string("transpose_55")]; tensor var_283 = expand_dims(axes = var_283_axes_0, x = var_282)[name = string("op_283")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_283)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_283)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_283)[name = string("value_states_1")]; tensor var_303 = const()[name = string("op_303"), val = tensor([1, 32, 64, 64])]; tensor var_304 = reshape(shape = var_303, x = query_states_1)[name = string("op_304")]; tensor var_305 = const()[name = string("op_305"), val = tensor([0, 1, 3, 2])]; tensor var_307 = const()[name = string("op_307"), val = tensor([1, 8, 64, 64])]; tensor var_308 = reshape(shape = var_307, x = key_states_1)[name = string("op_308")]; tensor var_309 = const()[name = string("op_309"), val = tensor([0, 1, 3, 2])]; tensor var_311 = const()[name = string("op_311"), val = tensor([1, 8, 64, 64])]; tensor var_312 = reshape(shape = var_311, x = value_states_1)[name = string("op_312")]; tensor var_313 = const()[name = string("op_313"), val = tensor([0, 1, 3, 2])]; tensor var_315 = const()[name = string("op_315"), val = tensor([0, 2, 1, 3])]; tensor var_317 = const()[name = string("op_317"), val = tensor([0, 2, 1, 3])]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_1 = transpose(perm = var_305, x = var_304)[name = string("transpose_54")]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 32])]; tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_5 = transpose(perm = var_315, x = cos_1_cast_fp16)[name = string("transpose_53")]; tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 32])]; tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_5 = transpose(perm = var_317, x = sin_1_cast_fp16)[name = string("transpose_52")]; tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; tensor var_331 = mul(x = x1_1, y = cos_7)[name = string("op_331")]; tensor var_332 = mul(x = x2_1, y = sin_7)[name = string("op_332")]; tensor var_333 = sub(x = var_331, y = var_332)[name = string("op_333")]; tensor var_334 = mul(x = x2_1, y = cos_7)[name = string("op_334")]; tensor var_335 = mul(x = x1_1, y = sin_7)[name = string("op_335")]; tensor var_336 = add(x = var_334, y = var_335)[name = string("op_336")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1 = concat(axis = var_50, interleave = rotated_1_interleave_0, values = (var_333, var_336))[name = string("rotated_1")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_5 = transpose(perm = var_309, x = var_308)[name = string("transpose_51")]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; tensor var_352 = mul(x = x1_3, y = cos_7)[name = string("op_352")]; tensor var_353 = mul(x = x2_3, y = sin_7)[name = string("op_353")]; tensor var_354 = sub(x = var_352, y = var_353)[name = string("op_354")]; tensor var_355 = mul(x = x2_3, y = cos_7)[name = string("op_355")]; tensor var_356 = mul(x = x1_3, y = sin_7)[name = string("op_356")]; tensor var_357 = add(x = var_355, y = var_356)[name = string("op_357")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3 = concat(axis = var_50, interleave = rotated_3_interleave_0, values = (var_354, var_357))[name = string("rotated_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; tensor var_366 = add(x = current_pos, y = seq_length_1)[name = string("op_366")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([8])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([9])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_366, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([24])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([25])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_366, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_313, x = var_312)[name = string("transpose_50")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; tensor var_380_begin_0 = const()[name = string("op_380_begin_0"), val = tensor([8, 0, 0, 0])]; tensor var_380_end_0 = const()[name = string("op_380_end_0"), val = tensor([9, 8, 1546, 64])]; tensor var_380_end_mask_0 = const()[name = string("op_380_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = coreml_update_state_17)[name = string("op_380_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_380_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_382_begin_0 = const()[name = string("op_382_begin_0"), val = tensor([24, 0, 0, 0])]; tensor var_382_end_0 = const()[name = string("op_382_end_0"), val = tensor([25, 8, 1546, 64])]; tensor var_382_end_mask_0 = const()[name = string("op_382_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_382_cast_fp16 = slice_by_index(begin = var_382_begin_0, end = var_382_end_0, end_mask = var_382_end_mask_0, x = coreml_update_state_17)[name = string("op_382_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_382_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_391 = const()[name = string("op_391"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_391, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_395 = const()[name = string("op_395"), val = tensor([1, -1, 1546, 64])]; tensor var_396_cast_fp16 = reshape(shape = var_395, x = x_13_cast_fp16)[name = string("op_396_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_398 = const()[name = string("op_398"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_398, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; bool var_405_transpose_x_0 = const()[name = string("op_405_transpose_x_0"), val = bool(false)]; bool var_405_transpose_y_0 = const()[name = string("op_405_transpose_y_0"), val = bool(true)]; tensor var_405_cast_fp16 = matmul(transpose_x = var_405_transpose_x_0, transpose_y = var_405_transpose_y_0, x = rotated_1, y = var_396_cast_fp16)[name = string("op_405_cast_fp16")]; fp16 var_406_to_fp16 = const()[name = string("op_406_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_1_cast_fp16 = mul(x = var_405_cast_fp16, y = var_406_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_417_axes_0 = const()[name = string("op_417_axes_0"), val = tensor([-1])]; bool var_417_keep_dims_0 = const()[name = string("op_417_keep_dims_0"), val = bool(true)]; tensor var_417_cast_fp16 = reduce_sum(axes = var_417_axes_0, keep_dims = var_417_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_417_cast_fp16")]; tensor var_418_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_417_cast_fp16)[name = string("op_418_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 64, 1546])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_418_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 1546, 64])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 64, 64])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_421_perm_0 = const()[name = string("op_421_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_423 = const()[name = string("op_423"), val = tensor([1, 64, 2048])]; tensor var_421_cast_fp16 = transpose(perm = var_421_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_49")]; tensor input_5_cast_fp16 = reshape(shape = var_423, x = var_421_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262842688))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262850944)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_434_axes_0 = const()[name = string("op_434_axes_0"), val = tensor([-1])]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262855104)))]; tensor var_434_cast_fp16 = layer_norm(axes = var_434_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_434_cast_fp16")]; tensor var_441 = const()[name = string("op_441"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_442 = transpose(perm = var_441, x = var_434_cast_fp16)[name = string("transpose_48")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_442)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_464_axes_0 = const()[name = string("op_464_axes_0"), val = tensor([2])]; tensor var_464 = squeeze(axes = var_464_axes_0, x = hidden_states_7)[name = string("op_464")]; tensor var_465 = const()[name = string("op_465"), val = tensor([0, 2, 1])]; tensor var_466 = transpose(perm = var_465, x = var_464)[name = string("transpose_47")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_466)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_474_axes_0 = const()[name = string("op_474_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262859264)))]; tensor var_474_cast_fp16 = layer_norm(axes = var_474_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_474_cast_fp16")]; tensor var_478 = const()[name = string("op_478"), val = tensor([0, 2, 1])]; tensor var_480_axes_0 = const()[name = string("op_480_axes_0"), val = tensor([2])]; tensor var_479 = transpose(perm = var_478, x = var_474_cast_fp16)[name = string("transpose_46")]; tensor var_480 = expand_dims(axes = var_480_axes_0, x = var_479)[name = string("op_480")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_480)[name = string("query_states_5")]; string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_480)[name = string("key_states_7")]; string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_480)[name = string("value_states_7")]; tensor var_500 = const()[name = string("op_500"), val = tensor([1, 32, 64, 64])]; tensor var_501 = reshape(shape = var_500, x = query_states_5)[name = string("op_501")]; tensor var_502 = const()[name = string("op_502"), val = tensor([0, 1, 3, 2])]; tensor var_504 = const()[name = string("op_504"), val = tensor([1, 8, 64, 64])]; tensor var_505 = reshape(shape = var_504, x = key_states_7)[name = string("op_505")]; tensor var_506 = const()[name = string("op_506"), val = tensor([0, 1, 3, 2])]; tensor var_508 = const()[name = string("op_508"), val = tensor([1, 8, 64, 64])]; tensor var_509 = reshape(shape = var_508, x = value_states_7)[name = string("op_509")]; tensor var_510 = const()[name = string("op_510"), val = tensor([0, 1, 3, 2])]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_29 = transpose(perm = var_502, x = var_501)[name = string("transpose_45")]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; tensor var_528 = mul(x = x1_5, y = cos_7)[name = string("op_528")]; tensor var_529 = mul(x = x2_5, y = sin_7)[name = string("op_529")]; tensor var_530 = sub(x = var_528, y = var_529)[name = string("op_530")]; tensor var_531 = mul(x = x2_5, y = cos_7)[name = string("op_531")]; tensor var_532 = mul(x = x1_5, y = sin_7)[name = string("op_532")]; tensor var_533 = add(x = var_531, y = var_532)[name = string("op_533")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5 = concat(axis = var_50, interleave = rotated_5_interleave_0, values = (var_530, var_533))[name = string("rotated_5")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_33 = transpose(perm = var_506, x = var_505)[name = string("transpose_44")]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; tensor var_549 = mul(x = x1_7, y = cos_7)[name = string("op_549")]; tensor var_550 = mul(x = x2_7, y = sin_7)[name = string("op_550")]; tensor var_551 = sub(x = var_549, y = var_550)[name = string("op_551")]; tensor var_552 = mul(x = x2_7, y = cos_7)[name = string("op_552")]; tensor var_553 = mul(x = x1_7, y = sin_7)[name = string("op_553")]; tensor var_554 = add(x = var_552, y = var_553)[name = string("op_554")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7 = concat(axis = var_50, interleave = rotated_7_interleave_0, values = (var_551, var_554))[name = string("rotated_7")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([9])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([10])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_366, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([25])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([26])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_366, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_9 = transpose(perm = var_510, x = var_509)[name = string("transpose_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; tensor var_577_begin_0 = const()[name = string("op_577_begin_0"), val = tensor([9, 0, 0, 0])]; tensor var_577_end_0 = const()[name = string("op_577_end_0"), val = tensor([10, 8, 1546, 64])]; tensor var_577_end_mask_0 = const()[name = string("op_577_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_577_cast_fp16 = slice_by_index(begin = var_577_begin_0, end = var_577_end_0, end_mask = var_577_end_mask_0, x = coreml_update_state_19)[name = string("op_577_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_577_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_579_begin_0 = const()[name = string("op_579_begin_0"), val = tensor([25, 0, 0, 0])]; tensor var_579_end_0 = const()[name = string("op_579_end_0"), val = tensor([26, 8, 1546, 64])]; tensor var_579_end_mask_0 = const()[name = string("op_579_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_579_cast_fp16 = slice_by_index(begin = var_579_begin_0, end = var_579_end_0, end_mask = var_579_end_mask_0, x = coreml_update_state_19)[name = string("op_579_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_579_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_588 = const()[name = string("op_588"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_588, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_592 = const()[name = string("op_592"), val = tensor([1, -1, 1546, 64])]; tensor var_593_cast_fp16 = reshape(shape = var_592, x = x_41_cast_fp16)[name = string("op_593_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_595 = const()[name = string("op_595"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_595, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; bool var_602_transpose_x_0 = const()[name = string("op_602_transpose_x_0"), val = bool(false)]; bool var_602_transpose_y_0 = const()[name = string("op_602_transpose_y_0"), val = bool(true)]; tensor var_602_cast_fp16 = matmul(transpose_x = var_602_transpose_x_0, transpose_y = var_602_transpose_y_0, x = rotated_5, y = var_593_cast_fp16)[name = string("op_602_cast_fp16")]; fp16 var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_3_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = string("attn_weights_3_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_614_axes_0 = const()[name = string("op_614_axes_0"), val = tensor([-1])]; bool var_614_keep_dims_0 = const()[name = string("op_614_keep_dims_0"), val = bool(true)]; tensor var_614_cast_fp16 = reduce_sum(axes = var_614_axes_0, keep_dims = var_614_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_614_cast_fp16")]; tensor var_615_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_614_cast_fp16)[name = string("op_615_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 64, 1546])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_615_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 1546, 64])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 64, 64])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_618_perm_0 = const()[name = string("op_618_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_620 = const()[name = string("op_620"), val = tensor([1, 64, 2048])]; tensor var_618_cast_fp16 = transpose(perm = var_618_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_42")]; tensor input_19_cast_fp16 = reshape(shape = var_620, x = var_618_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262863424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264960640))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_631_axes_0 = const()[name = string("op_631_axes_0"), val = tensor([-1])]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264968896)))]; tensor var_631_cast_fp16 = layer_norm(axes = var_631_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_631_cast_fp16")]; tensor var_638 = const()[name = string("op_638"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_639 = transpose(perm = var_638, x = var_631_cast_fp16)[name = string("transpose_41")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_639)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_661_axes_0 = const()[name = string("op_661_axes_0"), val = tensor([2])]; tensor var_661 = squeeze(axes = var_661_axes_0, x = hidden_states_15)[name = string("op_661")]; tensor var_662 = const()[name = string("op_662"), val = tensor([0, 2, 1])]; tensor var_663 = transpose(perm = var_662, x = var_661)[name = string("transpose_40")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_663)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_671_axes_0 = const()[name = string("op_671_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264973056)))]; tensor var_671_cast_fp16 = layer_norm(axes = var_671_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_671_cast_fp16")]; tensor var_675 = const()[name = string("op_675"), val = tensor([0, 2, 1])]; tensor var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor([2])]; tensor var_676 = transpose(perm = var_675, x = var_671_cast_fp16)[name = string("transpose_39")]; tensor var_677 = expand_dims(axes = var_677_axes_0, x = var_676)[name = string("op_677")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_677)[name = string("query_states_9")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_677)[name = string("key_states_13")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_677)[name = string("value_states_13")]; tensor var_697 = const()[name = string("op_697"), val = tensor([1, 32, 64, 64])]; tensor var_698 = reshape(shape = var_697, x = query_states_9)[name = string("op_698")]; tensor var_699 = const()[name = string("op_699"), val = tensor([0, 1, 3, 2])]; tensor var_701 = const()[name = string("op_701"), val = tensor([1, 8, 64, 64])]; tensor var_702 = reshape(shape = var_701, x = key_states_13)[name = string("op_702")]; tensor var_703 = const()[name = string("op_703"), val = tensor([0, 1, 3, 2])]; tensor var_705 = const()[name = string("op_705"), val = tensor([1, 8, 64, 64])]; tensor var_706 = reshape(shape = var_705, x = value_states_13)[name = string("op_706")]; tensor var_707 = const()[name = string("op_707"), val = tensor([0, 1, 3, 2])]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_57 = transpose(perm = var_699, x = var_698)[name = string("transpose_38")]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; tensor var_725 = mul(x = x1_9, y = cos_7)[name = string("op_725")]; tensor var_726 = mul(x = x2_9, y = sin_7)[name = string("op_726")]; tensor var_727 = sub(x = var_725, y = var_726)[name = string("op_727")]; tensor var_728 = mul(x = x2_9, y = cos_7)[name = string("op_728")]; tensor var_729 = mul(x = x1_9, y = sin_7)[name = string("op_729")]; tensor var_730 = add(x = var_728, y = var_729)[name = string("op_730")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9 = concat(axis = var_50, interleave = rotated_9_interleave_0, values = (var_727, var_730))[name = string("rotated_9")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_61 = transpose(perm = var_703, x = var_702)[name = string("transpose_37")]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; tensor var_746 = mul(x = x1_11, y = cos_7)[name = string("op_746")]; tensor var_747 = mul(x = x2_11, y = sin_7)[name = string("op_747")]; tensor var_748 = sub(x = var_746, y = var_747)[name = string("op_748")]; tensor var_749 = mul(x = x2_11, y = cos_7)[name = string("op_749")]; tensor var_750 = mul(x = x1_11, y = sin_7)[name = string("op_750")]; tensor var_751 = add(x = var_749, y = var_750)[name = string("op_751")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11 = concat(axis = var_50, interleave = rotated_11_interleave_0, values = (var_748, var_751))[name = string("rotated_11")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([10])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([11])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_366, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([26])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([27])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_366, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15 = transpose(perm = var_707, x = var_706)[name = string("transpose_36")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; tensor var_774_begin_0 = const()[name = string("op_774_begin_0"), val = tensor([10, 0, 0, 0])]; tensor var_774_end_0 = const()[name = string("op_774_end_0"), val = tensor([11, 8, 1546, 64])]; tensor var_774_end_mask_0 = const()[name = string("op_774_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_774_cast_fp16 = slice_by_index(begin = var_774_begin_0, end = var_774_end_0, end_mask = var_774_end_mask_0, x = coreml_update_state_21)[name = string("op_774_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_774_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_776_begin_0 = const()[name = string("op_776_begin_0"), val = tensor([26, 0, 0, 0])]; tensor var_776_end_0 = const()[name = string("op_776_end_0"), val = tensor([27, 8, 1546, 64])]; tensor var_776_end_mask_0 = const()[name = string("op_776_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_776_cast_fp16 = slice_by_index(begin = var_776_begin_0, end = var_776_end_0, end_mask = var_776_end_mask_0, x = coreml_update_state_21)[name = string("op_776_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_776_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_785 = const()[name = string("op_785"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_785, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_789 = const()[name = string("op_789"), val = tensor([1, -1, 1546, 64])]; tensor var_790_cast_fp16 = reshape(shape = var_789, x = x_69_cast_fp16)[name = string("op_790_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_792 = const()[name = string("op_792"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_792, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_799_transpose_x_0 = const()[name = string("op_799_transpose_x_0"), val = bool(false)]; bool var_799_transpose_y_0 = const()[name = string("op_799_transpose_y_0"), val = bool(true)]; tensor var_799_cast_fp16 = matmul(transpose_x = var_799_transpose_x_0, transpose_y = var_799_transpose_y_0, x = rotated_9, y = var_790_cast_fp16)[name = string("op_799_cast_fp16")]; fp16 var_800_to_fp16 = const()[name = string("op_800_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_5_cast_fp16 = mul(x = var_799_cast_fp16, y = var_800_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_811_axes_0 = const()[name = string("op_811_axes_0"), val = tensor([-1])]; bool var_811_keep_dims_0 = const()[name = string("op_811_keep_dims_0"), val = bool(true)]; tensor var_811_cast_fp16 = reduce_sum(axes = var_811_axes_0, keep_dims = var_811_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_811_cast_fp16")]; tensor var_812_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_811_cast_fp16)[name = string("op_812_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 64, 1546])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_812_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 1546, 64])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 64, 64])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_815_perm_0 = const()[name = string("op_815_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_817 = const()[name = string("op_817"), val = tensor([1, 64, 2048])]; tensor var_815_cast_fp16 = transpose(perm = var_815_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_35")]; tensor input_33_cast_fp16 = reshape(shape = var_817, x = var_815_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264977216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267074432))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_828_axes_0 = const()[name = string("op_828_axes_0"), val = tensor([-1])]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267082688)))]; tensor var_828_cast_fp16 = layer_norm(axes = var_828_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_828_cast_fp16")]; tensor var_835 = const()[name = string("op_835"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_836 = transpose(perm = var_835, x = var_828_cast_fp16)[name = string("transpose_34")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_836)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_858_axes_0 = const()[name = string("op_858_axes_0"), val = tensor([2])]; tensor var_858 = squeeze(axes = var_858_axes_0, x = hidden_states_23)[name = string("op_858")]; tensor var_859 = const()[name = string("op_859"), val = tensor([0, 2, 1])]; tensor var_860 = transpose(perm = var_859, x = var_858)[name = string("transpose_33")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_860)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_868_axes_0 = const()[name = string("op_868_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267086848)))]; tensor var_868_cast_fp16 = layer_norm(axes = var_868_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_868_cast_fp16")]; tensor var_872 = const()[name = string("op_872"), val = tensor([0, 2, 1])]; tensor var_874_axes_0 = const()[name = string("op_874_axes_0"), val = tensor([2])]; tensor var_873 = transpose(perm = var_872, x = var_868_cast_fp16)[name = string("transpose_32")]; tensor var_874 = expand_dims(axes = var_874_axes_0, x = var_873)[name = string("op_874")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_874)[name = string("query_states_13")]; string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_874)[name = string("key_states_19")]; string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_874)[name = string("value_states_19")]; tensor var_894 = const()[name = string("op_894"), val = tensor([1, 32, 64, 64])]; tensor var_895 = reshape(shape = var_894, x = query_states_13)[name = string("op_895")]; tensor var_896 = const()[name = string("op_896"), val = tensor([0, 1, 3, 2])]; tensor var_898 = const()[name = string("op_898"), val = tensor([1, 8, 64, 64])]; tensor var_899 = reshape(shape = var_898, x = key_states_19)[name = string("op_899")]; tensor var_900 = const()[name = string("op_900"), val = tensor([0, 1, 3, 2])]; tensor var_902 = const()[name = string("op_902"), val = tensor([1, 8, 64, 64])]; tensor var_903 = reshape(shape = var_902, x = value_states_19)[name = string("op_903")]; tensor var_904 = const()[name = string("op_904"), val = tensor([0, 1, 3, 2])]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_85 = transpose(perm = var_896, x = var_895)[name = string("transpose_31")]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; tensor var_922 = mul(x = x1_13, y = cos_7)[name = string("op_922")]; tensor var_923 = mul(x = x2_13, y = sin_7)[name = string("op_923")]; tensor var_924 = sub(x = var_922, y = var_923)[name = string("op_924")]; tensor var_925 = mul(x = x2_13, y = cos_7)[name = string("op_925")]; tensor var_926 = mul(x = x1_13, y = sin_7)[name = string("op_926")]; tensor var_927 = add(x = var_925, y = var_926)[name = string("op_927")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13 = concat(axis = var_50, interleave = rotated_13_interleave_0, values = (var_924, var_927))[name = string("rotated_13")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_89 = transpose(perm = var_900, x = var_899)[name = string("transpose_30")]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; tensor var_943 = mul(x = x1_15, y = cos_7)[name = string("op_943")]; tensor var_944 = mul(x = x2_15, y = sin_7)[name = string("op_944")]; tensor var_945 = sub(x = var_943, y = var_944)[name = string("op_945")]; tensor var_946 = mul(x = x2_15, y = cos_7)[name = string("op_946")]; tensor var_947 = mul(x = x1_15, y = sin_7)[name = string("op_947")]; tensor var_948 = add(x = var_946, y = var_947)[name = string("op_948")]; bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; tensor rotated_15 = concat(axis = var_50, interleave = rotated_15_interleave_0, values = (var_945, var_948))[name = string("rotated_15")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([11])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([12])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_366, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([27])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([28])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_366, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_21 = transpose(perm = var_904, x = var_903)[name = string("transpose_29")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; tensor var_971_begin_0 = const()[name = string("op_971_begin_0"), val = tensor([11, 0, 0, 0])]; tensor var_971_end_0 = const()[name = string("op_971_end_0"), val = tensor([12, 8, 1546, 64])]; tensor var_971_end_mask_0 = const()[name = string("op_971_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_971_cast_fp16 = slice_by_index(begin = var_971_begin_0, end = var_971_end_0, end_mask = var_971_end_mask_0, x = coreml_update_state_23)[name = string("op_971_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_971_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_973_begin_0 = const()[name = string("op_973_begin_0"), val = tensor([27, 0, 0, 0])]; tensor var_973_end_0 = const()[name = string("op_973_end_0"), val = tensor([28, 8, 1546, 64])]; tensor var_973_end_mask_0 = const()[name = string("op_973_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_973_cast_fp16 = slice_by_index(begin = var_973_begin_0, end = var_973_end_0, end_mask = var_973_end_mask_0, x = coreml_update_state_23)[name = string("op_973_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_973_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_982 = const()[name = string("op_982"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_982, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_986 = const()[name = string("op_986"), val = tensor([1, -1, 1546, 64])]; tensor var_987_cast_fp16 = reshape(shape = var_986, x = x_97_cast_fp16)[name = string("op_987_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_989 = const()[name = string("op_989"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_989, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; bool var_996_transpose_x_0 = const()[name = string("op_996_transpose_x_0"), val = bool(false)]; bool var_996_transpose_y_0 = const()[name = string("op_996_transpose_y_0"), val = bool(true)]; tensor var_996_cast_fp16 = matmul(transpose_x = var_996_transpose_x_0, transpose_y = var_996_transpose_y_0, x = rotated_13, y = var_987_cast_fp16)[name = string("op_996_cast_fp16")]; fp16 var_997_to_fp16 = const()[name = string("op_997_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_7_cast_fp16 = mul(x = var_996_cast_fp16, y = var_997_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; tensor var_1008_axes_0 = const()[name = string("op_1008_axes_0"), val = tensor([-1])]; bool var_1008_keep_dims_0 = const()[name = string("op_1008_keep_dims_0"), val = bool(true)]; tensor var_1008_cast_fp16 = reduce_sum(axes = var_1008_axes_0, keep_dims = var_1008_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1008_cast_fp16")]; tensor var_1009_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1008_cast_fp16)[name = string("op_1009_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 64, 1546])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1009_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 1546, 64])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 64, 64])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_1012_perm_0 = const()[name = string("op_1012_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1014 = const()[name = string("op_1014"), val = tensor([1, 64, 2048])]; tensor var_1012_cast_fp16 = transpose(perm = var_1012_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_28")]; tensor input_47_cast_fp16 = reshape(shape = var_1014, x = var_1012_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267091008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269188224))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_1025_axes_0 = const()[name = string("op_1025_axes_0"), val = tensor([-1])]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196480)))]; tensor var_1025_cast_fp16 = layer_norm(axes = var_1025_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1025_cast_fp16")]; tensor var_1032 = const()[name = string("op_1032"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_1033 = transpose(perm = var_1032, x = var_1025_cast_fp16)[name = string("transpose_27")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1033)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; tensor var_1055_axes_0 = const()[name = string("op_1055_axes_0"), val = tensor([2])]; tensor var_1055 = squeeze(axes = var_1055_axes_0, x = hidden_states_31)[name = string("op_1055")]; tensor var_1056 = const()[name = string("op_1056"), val = tensor([0, 2, 1])]; tensor var_1057 = transpose(perm = var_1056, x = var_1055)[name = string("transpose_26")]; tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1057)[name = string("hidden_states_33_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; tensor var_1065_axes_0 = const()[name = string("op_1065_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269200640)))]; tensor var_1065_cast_fp16 = layer_norm(axes = var_1065_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1065_cast_fp16")]; tensor var_1069 = const()[name = string("op_1069"), val = tensor([0, 2, 1])]; tensor var_1071_axes_0 = const()[name = string("op_1071_axes_0"), val = tensor([2])]; tensor var_1070 = transpose(perm = var_1069, x = var_1065_cast_fp16)[name = string("transpose_25")]; tensor var_1071 = expand_dims(axes = var_1071_axes_0, x = var_1070)[name = string("op_1071")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_1071)[name = string("query_states_17")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_1071)[name = string("key_states_25")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_1071)[name = string("value_states_25")]; tensor var_1091 = const()[name = string("op_1091"), val = tensor([1, 32, 64, 64])]; tensor var_1092 = reshape(shape = var_1091, x = query_states_17)[name = string("op_1092")]; tensor var_1093 = const()[name = string("op_1093"), val = tensor([0, 1, 3, 2])]; tensor var_1095 = const()[name = string("op_1095"), val = tensor([1, 8, 64, 64])]; tensor var_1096 = reshape(shape = var_1095, x = key_states_25)[name = string("op_1096")]; tensor var_1097 = const()[name = string("op_1097"), val = tensor([0, 1, 3, 2])]; tensor var_1099 = const()[name = string("op_1099"), val = tensor([1, 8, 64, 64])]; tensor var_1100 = reshape(shape = var_1099, x = value_states_25)[name = string("op_1100")]; tensor var_1101 = const()[name = string("op_1101"), val = tensor([0, 1, 3, 2])]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_113 = transpose(perm = var_1093, x = var_1092)[name = string("transpose_24")]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; tensor var_1119 = mul(x = x1_17, y = cos_7)[name = string("op_1119")]; tensor var_1120 = mul(x = x2_17, y = sin_7)[name = string("op_1120")]; tensor var_1121 = sub(x = var_1119, y = var_1120)[name = string("op_1121")]; tensor var_1122 = mul(x = x2_17, y = cos_7)[name = string("op_1122")]; tensor var_1123 = mul(x = x1_17, y = sin_7)[name = string("op_1123")]; tensor var_1124 = add(x = var_1122, y = var_1123)[name = string("op_1124")]; bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; tensor rotated_17 = concat(axis = var_50, interleave = rotated_17_interleave_0, values = (var_1121, var_1124))[name = string("rotated_17")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_117 = transpose(perm = var_1097, x = var_1096)[name = string("transpose_23")]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; tensor var_1140 = mul(x = x1_19, y = cos_7)[name = string("op_1140")]; tensor var_1141 = mul(x = x2_19, y = sin_7)[name = string("op_1141")]; tensor var_1142 = sub(x = var_1140, y = var_1141)[name = string("op_1142")]; tensor var_1143 = mul(x = x2_19, y = cos_7)[name = string("op_1143")]; tensor var_1144 = mul(x = x1_19, y = sin_7)[name = string("op_1144")]; tensor var_1145 = add(x = var_1143, y = var_1144)[name = string("op_1145")]; bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; tensor rotated_19 = concat(axis = var_50, interleave = rotated_19_interleave_0, values = (var_1142, var_1145))[name = string("rotated_19")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([12])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([13])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_366, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([28])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([29])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_366, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27 = transpose(perm = var_1101, x = var_1100)[name = string("transpose_22")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; tensor var_1168_begin_0 = const()[name = string("op_1168_begin_0"), val = tensor([12, 0, 0, 0])]; tensor var_1168_end_0 = const()[name = string("op_1168_end_0"), val = tensor([13, 8, 1546, 64])]; tensor var_1168_end_mask_0 = const()[name = string("op_1168_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1168_cast_fp16 = slice_by_index(begin = var_1168_begin_0, end = var_1168_end_0, end_mask = var_1168_end_mask_0, x = coreml_update_state_25)[name = string("op_1168_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1168_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_1170_begin_0 = const()[name = string("op_1170_begin_0"), val = tensor([28, 0, 0, 0])]; tensor var_1170_end_0 = const()[name = string("op_1170_end_0"), val = tensor([29, 8, 1546, 64])]; tensor var_1170_end_mask_0 = const()[name = string("op_1170_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = coreml_update_state_25)[name = string("op_1170_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1170_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1179 = const()[name = string("op_1179"), val = tensor([1, 4, 1, 1])]; tensor x_125_cast_fp16 = tile(reps = var_1179, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; tensor var_1183 = const()[name = string("op_1183"), val = tensor([1, -1, 1546, 64])]; tensor var_1184_cast_fp16 = reshape(shape = var_1183, x = x_125_cast_fp16)[name = string("op_1184_cast_fp16")]; tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_1186 = const()[name = string("op_1186"), val = tensor([1, 4, 1, 1])]; tensor x_131_cast_fp16 = tile(reps = var_1186, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; bool var_1193_transpose_x_0 = const()[name = string("op_1193_transpose_x_0"), val = bool(false)]; bool var_1193_transpose_y_0 = const()[name = string("op_1193_transpose_y_0"), val = bool(true)]; tensor var_1193_cast_fp16 = matmul(transpose_x = var_1193_transpose_x_0, transpose_y = var_1193_transpose_y_0, x = rotated_17, y = var_1184_cast_fp16)[name = string("op_1193_cast_fp16")]; fp16 var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_9_cast_fp16 = mul(x = var_1193_cast_fp16, y = var_1194_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; tensor var_1205_axes_0 = const()[name = string("op_1205_axes_0"), val = tensor([-1])]; bool var_1205_keep_dims_0 = const()[name = string("op_1205_keep_dims_0"), val = bool(true)]; tensor var_1205_cast_fp16 = reduce_sum(axes = var_1205_axes_0, keep_dims = var_1205_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1205_cast_fp16")]; tensor var_1206_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1205_cast_fp16)[name = string("op_1206_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([32, 64, 1546])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1206_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([32, 1546, 64])]; tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 32, 64, 64])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor var_1209_perm_0 = const()[name = string("op_1209_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1211 = const()[name = string("op_1211"), val = tensor([1, 64, 2048])]; tensor var_1209_cast_fp16 = transpose(perm = var_1209_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_21")]; tensor input_61_cast_fp16 = reshape(shape = var_1211, x = var_1209_cast_fp16)[name = string("input_61_cast_fp16")]; tensor model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269204800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271302016))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1222_axes_0 = const()[name = string("op_1222_axes_0"), val = tensor([-1])]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271310272)))]; tensor var_1222_cast_fp16 = layer_norm(axes = var_1222_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1222_cast_fp16")]; tensor var_1229 = const()[name = string("op_1229"), val = tensor([0, 2, 1])]; tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; tensor var_1230 = transpose(perm = var_1229, x = var_1222_cast_fp16)[name = string("transpose_20")]; tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1230)[name = string("input_65")]; string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; tensor var_1252_axes_0 = const()[name = string("op_1252_axes_0"), val = tensor([2])]; tensor var_1252 = squeeze(axes = var_1252_axes_0, x = hidden_states_39)[name = string("op_1252")]; tensor var_1253 = const()[name = string("op_1253"), val = tensor([0, 2, 1])]; tensor var_1254 = transpose(perm = var_1253, x = var_1252)[name = string("transpose_19")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1254)[name = string("hidden_states_41_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; tensor var_1262_axes_0 = const()[name = string("op_1262_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271314432)))]; tensor var_1262_cast_fp16 = layer_norm(axes = var_1262_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1262_cast_fp16")]; tensor var_1266 = const()[name = string("op_1266"), val = tensor([0, 2, 1])]; tensor var_1268_axes_0 = const()[name = string("op_1268_axes_0"), val = tensor([2])]; tensor var_1267 = transpose(perm = var_1266, x = var_1262_cast_fp16)[name = string("transpose_18")]; tensor var_1268 = expand_dims(axes = var_1268_axes_0, x = var_1267)[name = string("op_1268")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_1268)[name = string("query_states_21")]; string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_1268)[name = string("key_states_31")]; string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_1268)[name = string("value_states_31")]; tensor var_1288 = const()[name = string("op_1288"), val = tensor([1, 32, 64, 64])]; tensor var_1289 = reshape(shape = var_1288, x = query_states_21)[name = string("op_1289")]; tensor var_1290 = const()[name = string("op_1290"), val = tensor([0, 1, 3, 2])]; tensor var_1292 = const()[name = string("op_1292"), val = tensor([1, 8, 64, 64])]; tensor var_1293 = reshape(shape = var_1292, x = key_states_31)[name = string("op_1293")]; tensor var_1294 = const()[name = string("op_1294"), val = tensor([0, 1, 3, 2])]; tensor var_1296 = const()[name = string("op_1296"), val = tensor([1, 8, 64, 64])]; tensor var_1297 = reshape(shape = var_1296, x = value_states_31)[name = string("op_1297")]; tensor var_1298 = const()[name = string("op_1298"), val = tensor([0, 1, 3, 2])]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_141 = transpose(perm = var_1290, x = var_1289)[name = string("transpose_17")]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; tensor var_1316 = mul(x = x1_21, y = cos_7)[name = string("op_1316")]; tensor var_1317 = mul(x = x2_21, y = sin_7)[name = string("op_1317")]; tensor var_1318 = sub(x = var_1316, y = var_1317)[name = string("op_1318")]; tensor var_1319 = mul(x = x2_21, y = cos_7)[name = string("op_1319")]; tensor var_1320 = mul(x = x1_21, y = sin_7)[name = string("op_1320")]; tensor var_1321 = add(x = var_1319, y = var_1320)[name = string("op_1321")]; bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; tensor rotated_21 = concat(axis = var_50, interleave = rotated_21_interleave_0, values = (var_1318, var_1321))[name = string("rotated_21")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_145 = transpose(perm = var_1294, x = var_1293)[name = string("transpose_16")]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; tensor var_1337 = mul(x = x1_23, y = cos_7)[name = string("op_1337")]; tensor var_1338 = mul(x = x2_23, y = sin_7)[name = string("op_1338")]; tensor var_1339 = sub(x = var_1337, y = var_1338)[name = string("op_1339")]; tensor var_1340 = mul(x = x2_23, y = cos_7)[name = string("op_1340")]; tensor var_1341 = mul(x = x1_23, y = sin_7)[name = string("op_1341")]; tensor var_1342 = add(x = var_1340, y = var_1341)[name = string("op_1342")]; bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; tensor rotated_23 = concat(axis = var_50, interleave = rotated_23_interleave_0, values = (var_1339, var_1342))[name = string("rotated_23")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([13])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([14])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_366, concat_93_values3_0))[name = string("concat_93")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([29])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([30])]; int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_366, concat_97_values3_0))[name = string("concat_97")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_33 = transpose(perm = var_1298, x = var_1297)[name = string("transpose_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; tensor var_1365_begin_0 = const()[name = string("op_1365_begin_0"), val = tensor([13, 0, 0, 0])]; tensor var_1365_end_0 = const()[name = string("op_1365_end_0"), val = tensor([14, 8, 1546, 64])]; tensor var_1365_end_mask_0 = const()[name = string("op_1365_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1365_cast_fp16 = slice_by_index(begin = var_1365_begin_0, end = var_1365_end_0, end_mask = var_1365_end_mask_0, x = coreml_update_state_27)[name = string("op_1365_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1365_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_1367_begin_0 = const()[name = string("op_1367_begin_0"), val = tensor([29, 0, 0, 0])]; tensor var_1367_end_0 = const()[name = string("op_1367_end_0"), val = tensor([30, 8, 1546, 64])]; tensor var_1367_end_mask_0 = const()[name = string("op_1367_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1367_cast_fp16 = slice_by_index(begin = var_1367_begin_0, end = var_1367_end_0, end_mask = var_1367_end_mask_0, x = coreml_update_state_27)[name = string("op_1367_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1367_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1376 = const()[name = string("op_1376"), val = tensor([1, 4, 1, 1])]; tensor x_153_cast_fp16 = tile(reps = var_1376, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1380 = const()[name = string("op_1380"), val = tensor([1, -1, 1546, 64])]; tensor var_1381_cast_fp16 = reshape(shape = var_1380, x = x_153_cast_fp16)[name = string("op_1381_cast_fp16")]; tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1383 = const()[name = string("op_1383"), val = tensor([1, 4, 1, 1])]; tensor x_159_cast_fp16 = tile(reps = var_1383, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; bool var_1390_transpose_x_0 = const()[name = string("op_1390_transpose_x_0"), val = bool(false)]; bool var_1390_transpose_y_0 = const()[name = string("op_1390_transpose_y_0"), val = bool(true)]; tensor var_1390_cast_fp16 = matmul(transpose_x = var_1390_transpose_x_0, transpose_y = var_1390_transpose_y_0, x = rotated_21, y = var_1381_cast_fp16)[name = string("op_1390_cast_fp16")]; fp16 var_1391_to_fp16 = const()[name = string("op_1391_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_11_cast_fp16 = mul(x = var_1390_cast_fp16, y = var_1391_to_fp16)[name = string("attn_weights_11_cast_fp16")]; tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; tensor var_1402_axes_0 = const()[name = string("op_1402_axes_0"), val = tensor([-1])]; bool var_1402_keep_dims_0 = const()[name = string("op_1402_keep_dims_0"), val = bool(true)]; tensor var_1402_cast_fp16 = reduce_sum(axes = var_1402_axes_0, keep_dims = var_1402_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1402_cast_fp16")]; tensor var_1403_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1402_cast_fp16)[name = string("op_1403_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([32, 64, 1546])]; tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1403_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([32, 1546, 64])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 32, 64, 64])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor var_1406_perm_0 = const()[name = string("op_1406_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1408 = const()[name = string("op_1408"), val = tensor([1, 64, 2048])]; tensor var_1406_cast_fp16 = transpose(perm = var_1406_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_14")]; tensor input_75_cast_fp16 = reshape(shape = var_1408, x = var_1406_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271318592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273415808))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; tensor var_1419_axes_0 = const()[name = string("op_1419_axes_0"), val = tensor([-1])]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273424064)))]; tensor var_1419_cast_fp16 = layer_norm(axes = var_1419_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1419_cast_fp16")]; tensor var_1426 = const()[name = string("op_1426"), val = tensor([0, 2, 1])]; tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; tensor var_1427 = transpose(perm = var_1426, x = var_1419_cast_fp16)[name = string("transpose_13")]; tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1427)[name = string("input_79")]; string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; tensor var_1449_axes_0 = const()[name = string("op_1449_axes_0"), val = tensor([2])]; tensor var_1449 = squeeze(axes = var_1449_axes_0, x = hidden_states_47)[name = string("op_1449")]; tensor var_1450 = const()[name = string("op_1450"), val = tensor([0, 2, 1])]; tensor var_1451 = transpose(perm = var_1450, x = var_1449)[name = string("transpose_12")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1451)[name = string("hidden_states_49_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; tensor var_1459_axes_0 = const()[name = string("op_1459_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273428224)))]; tensor var_1459_cast_fp16 = layer_norm(axes = var_1459_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1459_cast_fp16")]; tensor var_1463 = const()[name = string("op_1463"), val = tensor([0, 2, 1])]; tensor var_1465_axes_0 = const()[name = string("op_1465_axes_0"), val = tensor([2])]; tensor var_1464 = transpose(perm = var_1463, x = var_1459_cast_fp16)[name = string("transpose_11")]; tensor var_1465 = expand_dims(axes = var_1465_axes_0, x = var_1464)[name = string("op_1465")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_1465)[name = string("query_states_25")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_1465)[name = string("key_states_37")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_1465)[name = string("value_states_37")]; tensor var_1485 = const()[name = string("op_1485"), val = tensor([1, 32, 64, 64])]; tensor var_1486 = reshape(shape = var_1485, x = query_states_25)[name = string("op_1486")]; tensor var_1487 = const()[name = string("op_1487"), val = tensor([0, 1, 3, 2])]; tensor var_1489 = const()[name = string("op_1489"), val = tensor([1, 8, 64, 64])]; tensor var_1490 = reshape(shape = var_1489, x = key_states_37)[name = string("op_1490")]; tensor var_1491 = const()[name = string("op_1491"), val = tensor([0, 1, 3, 2])]; tensor var_1493 = const()[name = string("op_1493"), val = tensor([1, 8, 64, 64])]; tensor var_1494 = reshape(shape = var_1493, x = value_states_37)[name = string("op_1494")]; tensor var_1495 = const()[name = string("op_1495"), val = tensor([0, 1, 3, 2])]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_169 = transpose(perm = var_1487, x = var_1486)[name = string("transpose_10")]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; tensor var_1513 = mul(x = x1_25, y = cos_7)[name = string("op_1513")]; tensor var_1514 = mul(x = x2_25, y = sin_7)[name = string("op_1514")]; tensor var_1515 = sub(x = var_1513, y = var_1514)[name = string("op_1515")]; tensor var_1516 = mul(x = x2_25, y = cos_7)[name = string("op_1516")]; tensor var_1517 = mul(x = x1_25, y = sin_7)[name = string("op_1517")]; tensor var_1518 = add(x = var_1516, y = var_1517)[name = string("op_1518")]; bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; tensor rotated_25 = concat(axis = var_50, interleave = rotated_25_interleave_0, values = (var_1515, var_1518))[name = string("rotated_25")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_173 = transpose(perm = var_1491, x = var_1490)[name = string("transpose_9")]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; tensor var_1534 = mul(x = x1_27, y = cos_7)[name = string("op_1534")]; tensor var_1535 = mul(x = x2_27, y = sin_7)[name = string("op_1535")]; tensor var_1536 = sub(x = var_1534, y = var_1535)[name = string("op_1536")]; tensor var_1537 = mul(x = x2_27, y = cos_7)[name = string("op_1537")]; tensor var_1538 = mul(x = x1_27, y = sin_7)[name = string("op_1538")]; tensor var_1539 = add(x = var_1537, y = var_1538)[name = string("op_1539")]; bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; tensor rotated_27 = concat(axis = var_50, interleave = rotated_27_interleave_0, values = (var_1536, var_1539))[name = string("rotated_27")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([14])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([15])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_366, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([30])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([31])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_366, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39 = transpose(perm = var_1495, x = var_1494)[name = string("transpose_8")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; tensor var_1562_begin_0 = const()[name = string("op_1562_begin_0"), val = tensor([14, 0, 0, 0])]; tensor var_1562_end_0 = const()[name = string("op_1562_end_0"), val = tensor([15, 8, 1546, 64])]; tensor var_1562_end_mask_0 = const()[name = string("op_1562_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1562_cast_fp16 = slice_by_index(begin = var_1562_begin_0, end = var_1562_end_0, end_mask = var_1562_end_mask_0, x = coreml_update_state_29)[name = string("op_1562_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1562_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_1564_begin_0 = const()[name = string("op_1564_begin_0"), val = tensor([30, 0, 0, 0])]; tensor var_1564_end_0 = const()[name = string("op_1564_end_0"), val = tensor([31, 8, 1546, 64])]; tensor var_1564_end_mask_0 = const()[name = string("op_1564_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = var_1564_end_0, end_mask = var_1564_end_mask_0, x = coreml_update_state_29)[name = string("op_1564_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1564_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; tensor var_1573 = const()[name = string("op_1573"), val = tensor([1, 4, 1, 1])]; tensor x_181_cast_fp16 = tile(reps = var_1573, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_1577 = const()[name = string("op_1577"), val = tensor([1, -1, 1546, 64])]; tensor var_1578_cast_fp16 = reshape(shape = var_1577, x = x_181_cast_fp16)[name = string("op_1578_cast_fp16")]; tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; tensor var_1580 = const()[name = string("op_1580"), val = tensor([1, 4, 1, 1])]; tensor x_187_cast_fp16 = tile(reps = var_1580, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; bool var_1587_transpose_x_0 = const()[name = string("op_1587_transpose_x_0"), val = bool(false)]; bool var_1587_transpose_y_0 = const()[name = string("op_1587_transpose_y_0"), val = bool(true)]; tensor var_1587_cast_fp16 = matmul(transpose_x = var_1587_transpose_x_0, transpose_y = var_1587_transpose_y_0, x = rotated_25, y = var_1578_cast_fp16)[name = string("op_1587_cast_fp16")]; fp16 var_1588_to_fp16 = const()[name = string("op_1588_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_13_cast_fp16 = mul(x = var_1587_cast_fp16, y = var_1588_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; tensor var_1599_axes_0 = const()[name = string("op_1599_axes_0"), val = tensor([-1])]; bool var_1599_keep_dims_0 = const()[name = string("op_1599_keep_dims_0"), val = bool(true)]; tensor var_1599_cast_fp16 = reduce_sum(axes = var_1599_axes_0, keep_dims = var_1599_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1599_cast_fp16")]; tensor var_1600_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1599_cast_fp16)[name = string("op_1600_cast_fp16")]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([32, 64, 1546])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1600_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor concat_121 = const()[name = string("concat_121"), val = tensor([32, 1546, 64])]; tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 32, 64, 64])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor var_1603_perm_0 = const()[name = string("op_1603_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1605 = const()[name = string("op_1605"), val = tensor([1, 64, 2048])]; tensor var_1603_cast_fp16 = transpose(perm = var_1603_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_7")]; tensor input_89_cast_fp16 = reshape(shape = var_1605, x = var_1603_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273432384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275529600))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_1616_axes_0 = const()[name = string("op_1616_axes_0"), val = tensor([-1])]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275537856)))]; tensor var_1616_cast_fp16 = layer_norm(axes = var_1616_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1616_cast_fp16")]; tensor var_1623 = const()[name = string("op_1623"), val = tensor([0, 2, 1])]; tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; tensor var_1624 = transpose(perm = var_1623, x = var_1616_cast_fp16)[name = string("transpose_6")]; tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1624)[name = string("input_93")]; string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")]; tensor gate_states = silu(x = input_95)[name = string("gate_states")]; tensor input_97 = mul(x = gate_states, y = up_states)[name = string("input_97")]; string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; tensor var_1646_axes_0 = const()[name = string("op_1646_axes_0"), val = tensor([2])]; tensor var_1646 = squeeze(axes = var_1646_axes_0, x = hidden_states_55)[name = string("op_1646")]; tensor var_1647 = const()[name = string("op_1647"), val = tensor([0, 2, 1])]; tensor var_1648 = transpose(perm = var_1647, x = var_1646)[name = string("transpose_5")]; tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1648)[name = string("hidden_states_57_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1656_axes_0 = const()[name = string("op_1656_axes_0"), val = tensor([-1])]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275542016)))]; tensor var_1656_cast_fp16 = layer_norm(axes = var_1656_axes_0, epsilon = var_52_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1656_cast_fp16")]; tensor var_1660 = const()[name = string("op_1660"), val = tensor([0, 2, 1])]; tensor var_1662_axes_0 = const()[name = string("op_1662_axes_0"), val = tensor([2])]; tensor var_1661 = transpose(perm = var_1660, x = var_1656_cast_fp16)[name = string("transpose_4")]; tensor var_1662 = expand_dims(axes = var_1662_axes_0, x = var_1661)[name = string("op_1662")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_1662)[name = string("query_states_29")]; string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_1662)[name = string("key_states_43")]; string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_1662)[name = string("value_states_43")]; tensor var_1682 = const()[name = string("op_1682"), val = tensor([1, 32, 64, 64])]; tensor var_1683 = reshape(shape = var_1682, x = query_states_29)[name = string("op_1683")]; tensor var_1684 = const()[name = string("op_1684"), val = tensor([0, 1, 3, 2])]; tensor var_1686 = const()[name = string("op_1686"), val = tensor([1, 8, 64, 64])]; tensor var_1687 = reshape(shape = var_1686, x = key_states_43)[name = string("op_1687")]; tensor var_1688 = const()[name = string("op_1688"), val = tensor([0, 1, 3, 2])]; tensor var_1690 = const()[name = string("op_1690"), val = tensor([1, 8, 64, 64])]; tensor var_1691 = reshape(shape = var_1690, x = value_states_43)[name = string("op_1691")]; tensor var_1692 = const()[name = string("op_1692"), val = tensor([0, 1, 3, 2])]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_197 = transpose(perm = var_1684, x = var_1683)[name = string("transpose_3")]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; tensor var_1710 = mul(x = x1_29, y = cos_7)[name = string("op_1710")]; tensor var_1711 = mul(x = x2_29, y = sin_7)[name = string("op_1711")]; tensor var_1712 = sub(x = var_1710, y = var_1711)[name = string("op_1712")]; tensor var_1713 = mul(x = x2_29, y = cos_7)[name = string("op_1713")]; tensor var_1714 = mul(x = x1_29, y = sin_7)[name = string("op_1714")]; tensor var_1715 = add(x = var_1713, y = var_1714)[name = string("op_1715")]; bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; tensor rotated_29 = concat(axis = var_50, interleave = rotated_29_interleave_0, values = (var_1712, var_1715))[name = string("rotated_29")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_201 = transpose(perm = var_1688, x = var_1687)[name = string("transpose_2")]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_201)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_201)[name = string("x2")]; tensor var_1731 = mul(x = x1, y = cos_7)[name = string("op_1731")]; tensor var_1732 = mul(x = x2, y = sin_7)[name = string("op_1732")]; tensor var_1733 = sub(x = var_1731, y = var_1732)[name = string("op_1733")]; tensor var_1734 = mul(x = x2, y = cos_7)[name = string("op_1734")]; tensor var_1735 = mul(x = x1, y = sin_7)[name = string("op_1735")]; tensor var_1736 = add(x = var_1734, y = var_1735)[name = string("op_1736")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated = concat(axis = var_50, interleave = rotated_interleave_0, values = (var_1733, var_1736))[name = string("rotated")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([15])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([16])]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_366, concat_129_values3_0))[name = string("concat_129")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([31])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([32])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_366, concat_133_values3_0))[name = string("concat_133")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_45 = transpose(perm = var_1692, x = var_1691)[name = string("transpose_1")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; tensor var_1759_begin_0 = const()[name = string("op_1759_begin_0"), val = tensor([15, 0, 0, 0])]; tensor var_1759_end_0 = const()[name = string("op_1759_end_0"), val = tensor([16, 8, 1546, 64])]; tensor var_1759_end_mask_0 = const()[name = string("op_1759_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1759_cast_fp16 = slice_by_index(begin = var_1759_begin_0, end = var_1759_end_0, end_mask = var_1759_end_mask_0, x = coreml_update_state_31)[name = string("op_1759_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1759_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_1761_begin_0 = const()[name = string("op_1761_begin_0"), val = tensor([31, 0, 0, 0])]; tensor var_1761_end_0 = const()[name = string("op_1761_end_0"), val = tensor([1, 8, 1546, 64])]; tensor var_1761_end_mask_0 = const()[name = string("op_1761_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1761_cast_fp16 = slice_by_index(begin = var_1761_begin_0, end = var_1761_end_0, end_mask = var_1761_end_mask_0, x = coreml_update_state_31)[name = string("op_1761_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1761_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_1770 = const()[name = string("op_1770"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_1770, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_1774 = const()[name = string("op_1774"), val = tensor([1, -1, 1546, 64])]; tensor var_1775_cast_fp16 = reshape(shape = var_1774, x = x_209_cast_fp16)[name = string("op_1775_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_1777 = const()[name = string("op_1777"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_1777, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; bool var_1784_transpose_x_0 = const()[name = string("op_1784_transpose_x_0"), val = bool(false)]; bool var_1784_transpose_y_0 = const()[name = string("op_1784_transpose_y_0"), val = bool(true)]; tensor var_1784_cast_fp16 = matmul(transpose_x = var_1784_transpose_x_0, transpose_y = var_1784_transpose_y_0, x = rotated_29, y = var_1775_cast_fp16)[name = string("op_1784_cast_fp16")]; fp16 var_1785_to_fp16 = const()[name = string("op_1785_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_cast_fp16 = mul(x = var_1784_cast_fp16, y = var_1785_to_fp16)[name = string("attn_weights_cast_fp16")]; tensor x_217_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; tensor x_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_1796_axes_0 = const()[name = string("op_1796_axes_0"), val = tensor([-1])]; bool var_1796_keep_dims_0 = const()[name = string("op_1796_keep_dims_0"), val = bool(true)]; tensor var_1796_cast_fp16 = reduce_sum(axes = var_1796_axes_0, keep_dims = var_1796_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1796_cast_fp16")]; tensor var_1797_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1796_cast_fp16)[name = string("op_1797_cast_fp16")]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([32, 64, 1546])]; tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_1797_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor concat_139 = const()[name = string("concat_139"), val = tensor([32, 1546, 64])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 32, 64, 64])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor var_1800_perm_0 = const()[name = string("op_1800_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1802 = const()[name = string("op_1802"), val = tensor([1, 64, 2048])]; tensor var_1800_cast_fp16 = transpose(perm = var_1800_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_0")]; tensor input_cast_fp16 = reshape(shape = var_1802, x = var_1800_cast_fp16)[name = string("input_cast_fp16")]; tensor model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275546176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277643392))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor hidden_states_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor var_1808_begin_0 = const()[name = string("op_1808_begin_0"), val = tensor([0, 0, 0])]; tensor var_1808_end_0 = const()[name = string("op_1808_end_0"), val = tensor([1, 1, 2048])]; tensor var_1808_end_mask_0 = const()[name = string("op_1808_end_mask_0"), val = tensor([true, false, true])]; tensor output_hidden_states = slice_by_index(begin = var_1808_begin_0, end = var_1808_end_0, end_mask = var_1808_end_mask_0, x = hidden_states_cast_fp16)[name = string("op_1808_cast_fp16")]; } -> (output_hidden_states); }