program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] { func infer(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201946816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210335488))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210368320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218756992))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218789824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227178496))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; int32 var_49 = const()[name = string("op_49"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_258_axis_0 = const()[name = string("op_258_axis_0"), val = int32(1)]; int32 var_258_batch_dims_0 = const()[name = string("op_258_batch_dims_0"), val = int32(0)]; bool var_258_validate_indices_0 = const()[name = string("op_258_validate_indices_0"), val = bool(false)]; tensor var_54_to_fp16 = const()[name = string("op_54_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752)))]; tensor var_258_cast_fp16 = gather(axis = var_258_axis_0, batch_dims = var_258_batch_dims_0, indices = select_0, validate_indices = var_258_validate_indices_0, x = var_54_to_fp16)[name = string("op_258_cast_fp16")]; tensor var_259 = const()[name = string("op_259"), val = tensor([1, 1, 1, -1])]; tensor sin_1_cast_fp16 = reshape(shape = var_259, x = var_258_cast_fp16)[name = string("sin_1_cast_fp16")]; int32 var_263_axis_0 = const()[name = string("op_263_axis_0"), val = int32(1)]; int32 var_263_batch_dims_0 = const()[name = string("op_263_batch_dims_0"), val = int32(0)]; bool var_263_validate_indices_0 = const()[name = string("op_263_validate_indices_0"), val = bool(false)]; tensor var_48_to_fp16 = const()[name = string("op_48_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243964032)))]; tensor var_263_cast_fp16 = gather(axis = var_263_axis_0, batch_dims = var_263_batch_dims_0, indices = select_0, validate_indices = var_263_validate_indices_0, x = var_48_to_fp16)[name = string("op_263_cast_fp16")]; tensor var_264 = const()[name = string("op_264"), val = tensor([1, 1, 1, -1])]; tensor cos_1_cast_fp16 = reshape(shape = var_264, x = var_263_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_272_axes_0 = const()[name = string("op_272_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260741312)))]; fp16 var_44_to_fp16 = const()[name = string("op_44_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_272_cast_fp16 = layer_norm(axes = var_272_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_272_cast_fp16")]; tensor var_275 = const()[name = string("op_275"), val = tensor([0, 2, 1])]; tensor var_277_axes_0 = const()[name = string("op_277_axes_0"), val = tensor([2])]; tensor var_276 = transpose(perm = var_275, x = var_272_cast_fp16)[name = string("transpose_31")]; tensor var_277 = expand_dims(axes = var_277_axes_0, x = var_276)[name = string("op_277")]; string var_284_pad_type_0 = const()[name = string("op_284_pad_type_0"), val = string("valid")]; tensor var_284_strides_0 = const()[name = string("op_284_strides_0"), val = tensor([1, 1])]; tensor var_284_pad_0 = const()[name = string("op_284_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_284_dilations_0 = const()[name = string("op_284_dilations_0"), val = tensor([1, 1])]; int32 var_284_groups_0 = const()[name = string("op_284_groups_0"), val = int32(1)]; tensor var_284 = conv(dilations = var_284_dilations_0, groups = var_284_groups_0, pad = var_284_pad_0, pad_type = var_284_pad_type_0, strides = var_284_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_277)[name = string("op_284")]; tensor var_285 = const()[name = string("op_285"), val = tensor([1, 32, 1, 64])]; tensor var_286 = reshape(shape = var_285, x = var_284)[name = string("op_286")]; string var_293_pad_type_0 = const()[name = string("op_293_pad_type_0"), val = string("valid")]; tensor var_293_strides_0 = const()[name = string("op_293_strides_0"), val = tensor([1, 1])]; tensor var_293_pad_0 = const()[name = string("op_293_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_293_dilations_0 = const()[name = string("op_293_dilations_0"), val = tensor([1, 1])]; int32 var_293_groups_0 = const()[name = string("op_293_groups_0"), val = int32(1)]; tensor var_293 = conv(dilations = var_293_dilations_0, groups = var_293_groups_0, pad = var_293_pad_0, pad_type = var_293_pad_type_0, strides = var_293_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_277)[name = string("op_293")]; tensor var_294 = const()[name = string("op_294"), val = tensor([1, 8, 1, 64])]; tensor var_295 = reshape(shape = var_294, x = var_293)[name = string("op_295")]; string var_302_pad_type_0 = const()[name = string("op_302_pad_type_0"), val = string("valid")]; tensor var_302_strides_0 = const()[name = string("op_302_strides_0"), val = tensor([1, 1])]; tensor var_302_pad_0 = const()[name = string("op_302_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_302_dilations_0 = const()[name = string("op_302_dilations_0"), val = tensor([1, 1])]; int32 var_302_groups_0 = const()[name = string("op_302_groups_0"), val = int32(1)]; tensor var_302 = conv(dilations = var_302_dilations_0, groups = var_302_groups_0, pad = var_302_pad_0, pad_type = var_302_pad_type_0, strides = var_302_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_277)[name = string("op_302")]; tensor var_303 = const()[name = string("op_303"), val = tensor([1, 8, 1, 64])]; tensor var_304 = reshape(shape = var_303, x = var_302)[name = string("op_304")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_286)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_286)[name = string("x2_1")]; tensor cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor([1, 1, 1, 32])]; tensor cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")]; tensor sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor([1, 1, 1, 32])]; tensor sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")]; tensor var_318_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_318_cast_fp16")]; tensor var_319_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_319_cast_fp16")]; tensor var_320_cast_fp16 = sub(x = var_318_cast_fp16, y = var_319_cast_fp16)[name = string("op_320_cast_fp16")]; tensor var_321_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_321_cast_fp16")]; tensor var_322_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_322_cast_fp16")]; tensor var_323_cast_fp16 = add(x = var_321_cast_fp16, y = var_322_cast_fp16)[name = string("op_323_cast_fp16")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1_cast_fp16 = concat(axis = var_49, interleave = rotated_1_interleave_0, values = (var_320_cast_fp16, var_323_cast_fp16))[name = string("rotated_1_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_295)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_295)[name = string("x2_3")]; tensor var_339_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_339_cast_fp16")]; tensor var_340_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_340_cast_fp16")]; tensor var_341_cast_fp16 = sub(x = var_339_cast_fp16, y = var_340_cast_fp16)[name = string("op_341_cast_fp16")]; tensor var_342_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_342_cast_fp16")]; tensor var_343_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_343_cast_fp16")]; tensor var_344_cast_fp16 = add(x = var_342_cast_fp16, y = var_343_cast_fp16)[name = string("op_344_cast_fp16")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3_cast_fp16 = concat(axis = var_49, interleave = rotated_3_interleave_0, values = (var_341_cast_fp16, var_344_cast_fp16))[name = string("rotated_3_cast_fp16")]; int32 var_348 = const()[name = string("op_348"), val = int32(1)]; tensor var_349 = add(x = current_pos, y = var_348)[name = string("op_349")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_349, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([16])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([17])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_349, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_304, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")]; tensor var_364_begin_0 = const()[name = string("op_364_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_364_end_0 = const()[name = string("op_364_end_0"), val = tensor([1, 8, 2048, 64])]; tensor var_364_end_mask_0 = const()[name = string("op_364_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = coreml_update_state_17)[name = string("op_364_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_364_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_366_begin_0 = const()[name = string("op_366_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_366_end_0 = const()[name = string("op_366_end_0"), val = tensor([17, 8, 2048, 64])]; tensor var_366_end_mask_0 = const()[name = string("op_366_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_366_cast_fp16 = slice_by_index(begin = var_366_begin_0, end = var_366_end_0, end_mask = var_366_end_mask_0, x = coreml_update_state_17)[name = string("op_366_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_366_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_375 = const()[name = string("op_375"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_375, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_379 = const()[name = string("op_379"), val = tensor([1, -1, 2048, 64])]; tensor key_states_3_cast_fp16 = reshape(shape = var_379, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_382 = const()[name = string("op_382"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_382, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_386 = const()[name = string("op_386"), val = tensor([1, -1, 2048, 64])]; tensor value_states_3_cast_fp16 = reshape(shape = var_386, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")]; bool var_389_transpose_x_1 = const()[name = string("op_389_transpose_x_1"), val = bool(false)]; bool var_389_transpose_y_1 = const()[name = string("op_389_transpose_y_1"), val = bool(true)]; tensor var_389_cast_fp16 = matmul(transpose_x = var_389_transpose_x_1, transpose_y = var_389_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_389_cast_fp16")]; fp16 var_390_to_fp16 = const()[name = string("op_390_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_1_cast_fp16 = mul(x = var_389_cast_fp16, y = var_390_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_401_axes_0 = const()[name = string("op_401_axes_0"), val = tensor([-1])]; bool var_401_keep_dims_0 = const()[name = string("op_401_keep_dims_0"), val = bool(true)]; tensor var_401_cast_fp16 = reduce_sum(axes = var_401_axes_0, keep_dims = var_401_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_401_cast_fp16")]; tensor attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_401_cast_fp16)[name = string("attn_weights_3_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_404_perm_0 = const()[name = string("op_404_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_406 = const()[name = string("op_406"), val = tensor([1, 1, 2048])]; tensor var_404_cast_fp16 = transpose(perm = var_404_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_30")]; tensor input_5_cast_fp16 = reshape(shape = var_406, x = var_404_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262842688))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262850944)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_417_axes_0 = const()[name = string("op_417_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262855104)))]; tensor var_417_cast_fp16 = layer_norm(axes = var_417_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_417_cast_fp16")]; tensor var_424 = const()[name = string("op_424"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_425 = transpose(perm = var_424, x = var_417_cast_fp16)[name = string("transpose_29")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_425)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_447_axes_0 = const()[name = string("op_447_axes_0"), val = tensor([2])]; tensor var_447 = squeeze(axes = var_447_axes_0, x = hidden_states_7)[name = string("op_447")]; tensor var_448 = const()[name = string("op_448"), val = tensor([0, 2, 1])]; tensor var_449 = transpose(perm = var_448, x = var_447)[name = string("transpose_28")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_449)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_457_axes_0 = const()[name = string("op_457_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262859264)))]; tensor var_457_cast_fp16 = layer_norm(axes = var_457_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_457_cast_fp16")]; tensor var_460 = const()[name = string("op_460"), val = tensor([0, 2, 1])]; tensor var_462_axes_0 = const()[name = string("op_462_axes_0"), val = tensor([2])]; tensor var_461 = transpose(perm = var_460, x = var_457_cast_fp16)[name = string("transpose_27")]; tensor var_462 = expand_dims(axes = var_462_axes_0, x = var_461)[name = string("op_462")]; string var_469_pad_type_0 = const()[name = string("op_469_pad_type_0"), val = string("valid")]; tensor var_469_strides_0 = const()[name = string("op_469_strides_0"), val = tensor([1, 1])]; tensor var_469_pad_0 = const()[name = string("op_469_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_469_dilations_0 = const()[name = string("op_469_dilations_0"), val = tensor([1, 1])]; int32 var_469_groups_0 = const()[name = string("op_469_groups_0"), val = int32(1)]; tensor var_469 = conv(dilations = var_469_dilations_0, groups = var_469_groups_0, pad = var_469_pad_0, pad_type = var_469_pad_type_0, strides = var_469_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_462)[name = string("op_469")]; tensor var_470 = const()[name = string("op_470"), val = tensor([1, 32, 1, 64])]; tensor var_471 = reshape(shape = var_470, x = var_469)[name = string("op_471")]; string var_478_pad_type_0 = const()[name = string("op_478_pad_type_0"), val = string("valid")]; tensor var_478_strides_0 = const()[name = string("op_478_strides_0"), val = tensor([1, 1])]; tensor var_478_pad_0 = const()[name = string("op_478_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_478_dilations_0 = const()[name = string("op_478_dilations_0"), val = tensor([1, 1])]; int32 var_478_groups_0 = const()[name = string("op_478_groups_0"), val = int32(1)]; tensor var_478 = conv(dilations = var_478_dilations_0, groups = var_478_groups_0, pad = var_478_pad_0, pad_type = var_478_pad_type_0, strides = var_478_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_462)[name = string("op_478")]; tensor var_479 = const()[name = string("op_479"), val = tensor([1, 8, 1, 64])]; tensor var_480 = reshape(shape = var_479, x = var_478)[name = string("op_480")]; string var_487_pad_type_0 = const()[name = string("op_487_pad_type_0"), val = string("valid")]; tensor var_487_strides_0 = const()[name = string("op_487_strides_0"), val = tensor([1, 1])]; tensor var_487_pad_0 = const()[name = string("op_487_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_487_dilations_0 = const()[name = string("op_487_dilations_0"), val = tensor([1, 1])]; int32 var_487_groups_0 = const()[name = string("op_487_groups_0"), val = int32(1)]; tensor var_487 = conv(dilations = var_487_dilations_0, groups = var_487_groups_0, pad = var_487_pad_0, pad_type = var_487_pad_type_0, strides = var_487_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_462)[name = string("op_487")]; tensor var_488 = const()[name = string("op_488"), val = tensor([1, 8, 1, 64])]; tensor var_489 = reshape(shape = var_488, x = var_487)[name = string("op_489")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_471)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_471)[name = string("x2_5")]; tensor var_503_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_503_cast_fp16")]; tensor var_504_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_504_cast_fp16")]; tensor var_505_cast_fp16 = sub(x = var_503_cast_fp16, y = var_504_cast_fp16)[name = string("op_505_cast_fp16")]; tensor var_506_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_506_cast_fp16")]; tensor var_507_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_507_cast_fp16")]; tensor var_508_cast_fp16 = add(x = var_506_cast_fp16, y = var_507_cast_fp16)[name = string("op_508_cast_fp16")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5_cast_fp16 = concat(axis = var_49, interleave = rotated_5_interleave_0, values = (var_505_cast_fp16, var_508_cast_fp16))[name = string("rotated_5_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_480)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_480)[name = string("x2_7")]; tensor var_524_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_524_cast_fp16")]; tensor var_525_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_525_cast_fp16")]; tensor var_526_cast_fp16 = sub(x = var_524_cast_fp16, y = var_525_cast_fp16)[name = string("op_526_cast_fp16")]; tensor var_527_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_527_cast_fp16")]; tensor var_528_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_528_cast_fp16")]; tensor var_529_cast_fp16 = add(x = var_527_cast_fp16, y = var_528_cast_fp16)[name = string("op_529_cast_fp16")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7_cast_fp16 = concat(axis = var_49, interleave = rotated_7_interleave_0, values = (var_526_cast_fp16, var_529_cast_fp16))[name = string("rotated_7_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")]; tensor concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor([0])]; tensor concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor([0])]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_349, concat_11_values3_0))[name = string("concat_11")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([17])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([18])]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")]; tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_349, concat_15_values3_0))[name = string("concat_15")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_489, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")]; tensor var_549_begin_0 = const()[name = string("op_549_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_549_end_0 = const()[name = string("op_549_end_0"), val = tensor([2, 8, 2048, 64])]; tensor var_549_end_mask_0 = const()[name = string("op_549_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_549_cast_fp16 = slice_by_index(begin = var_549_begin_0, end = var_549_end_0, end_mask = var_549_end_mask_0, x = coreml_update_state_19)[name = string("op_549_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_549_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_551_begin_0 = const()[name = string("op_551_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_551_end_0 = const()[name = string("op_551_end_0"), val = tensor([18, 8, 2048, 64])]; tensor var_551_end_mask_0 = const()[name = string("op_551_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_551_cast_fp16 = slice_by_index(begin = var_551_begin_0, end = var_551_end_0, end_mask = var_551_end_mask_0, x = coreml_update_state_19)[name = string("op_551_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_551_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_560 = const()[name = string("op_560"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_560, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_564 = const()[name = string("op_564"), val = tensor([1, -1, 2048, 64])]; tensor key_states_7_cast_fp16 = reshape(shape = var_564, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_567 = const()[name = string("op_567"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_567, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; tensor var_571 = const()[name = string("op_571"), val = tensor([1, -1, 2048, 64])]; tensor value_states_7_cast_fp16 = reshape(shape = var_571, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")]; bool var_574_transpose_x_1 = const()[name = string("op_574_transpose_x_1"), val = bool(false)]; bool var_574_transpose_y_1 = const()[name = string("op_574_transpose_y_1"), val = bool(true)]; tensor var_574_cast_fp16 = matmul(transpose_x = var_574_transpose_x_1, transpose_y = var_574_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_574_cast_fp16")]; fp16 var_575_to_fp16 = const()[name = string("op_575_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_5_cast_fp16 = mul(x = var_574_cast_fp16, y = var_575_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_586_axes_0 = const()[name = string("op_586_axes_0"), val = tensor([-1])]; bool var_586_keep_dims_0 = const()[name = string("op_586_keep_dims_0"), val = bool(true)]; tensor var_586_cast_fp16 = reduce_sum(axes = var_586_axes_0, keep_dims = var_586_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_586_cast_fp16")]; tensor attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_586_cast_fp16)[name = string("attn_weights_7_cast_fp16")]; bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)]; bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)]; tensor attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")]; tensor var_589_perm_0 = const()[name = string("op_589_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_591 = const()[name = string("op_591"), val = tensor([1, 1, 2048])]; tensor var_589_cast_fp16 = transpose(perm = var_589_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_26")]; tensor input_19_cast_fp16 = reshape(shape = var_591, x = var_589_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262863424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264960640))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_602_axes_0 = const()[name = string("op_602_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264968896)))]; tensor var_602_cast_fp16 = layer_norm(axes = var_602_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_602_cast_fp16")]; tensor var_609 = const()[name = string("op_609"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_610 = transpose(perm = var_609, x = var_602_cast_fp16)[name = string("transpose_25")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_610)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_632_axes_0 = const()[name = string("op_632_axes_0"), val = tensor([2])]; tensor var_632 = squeeze(axes = var_632_axes_0, x = hidden_states_15)[name = string("op_632")]; tensor var_633 = const()[name = string("op_633"), val = tensor([0, 2, 1])]; tensor var_634 = transpose(perm = var_633, x = var_632)[name = string("transpose_24")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_634)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_642_axes_0 = const()[name = string("op_642_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264973056)))]; tensor var_642_cast_fp16 = layer_norm(axes = var_642_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_642_cast_fp16")]; tensor var_645 = const()[name = string("op_645"), val = tensor([0, 2, 1])]; tensor var_647_axes_0 = const()[name = string("op_647_axes_0"), val = tensor([2])]; tensor var_646 = transpose(perm = var_645, x = var_642_cast_fp16)[name = string("transpose_23")]; tensor var_647 = expand_dims(axes = var_647_axes_0, x = var_646)[name = string("op_647")]; string var_654_pad_type_0 = const()[name = string("op_654_pad_type_0"), val = string("valid")]; tensor var_654_strides_0 = const()[name = string("op_654_strides_0"), val = tensor([1, 1])]; tensor var_654_pad_0 = const()[name = string("op_654_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_654_dilations_0 = const()[name = string("op_654_dilations_0"), val = tensor([1, 1])]; int32 var_654_groups_0 = const()[name = string("op_654_groups_0"), val = int32(1)]; tensor var_654 = conv(dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_647)[name = string("op_654")]; tensor var_655 = const()[name = string("op_655"), val = tensor([1, 32, 1, 64])]; tensor var_656 = reshape(shape = var_655, x = var_654)[name = string("op_656")]; string var_663_pad_type_0 = const()[name = string("op_663_pad_type_0"), val = string("valid")]; tensor var_663_strides_0 = const()[name = string("op_663_strides_0"), val = tensor([1, 1])]; tensor var_663_pad_0 = const()[name = string("op_663_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_663_dilations_0 = const()[name = string("op_663_dilations_0"), val = tensor([1, 1])]; int32 var_663_groups_0 = const()[name = string("op_663_groups_0"), val = int32(1)]; tensor var_663 = conv(dilations = var_663_dilations_0, groups = var_663_groups_0, pad = var_663_pad_0, pad_type = var_663_pad_type_0, strides = var_663_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_647)[name = string("op_663")]; tensor var_664 = const()[name = string("op_664"), val = tensor([1, 8, 1, 64])]; tensor var_665 = reshape(shape = var_664, x = var_663)[name = string("op_665")]; string var_672_pad_type_0 = const()[name = string("op_672_pad_type_0"), val = string("valid")]; tensor var_672_strides_0 = const()[name = string("op_672_strides_0"), val = tensor([1, 1])]; tensor var_672_pad_0 = const()[name = string("op_672_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_672_dilations_0 = const()[name = string("op_672_dilations_0"), val = tensor([1, 1])]; int32 var_672_groups_0 = const()[name = string("op_672_groups_0"), val = int32(1)]; tensor var_672 = conv(dilations = var_672_dilations_0, groups = var_672_groups_0, pad = var_672_pad_0, pad_type = var_672_pad_type_0, strides = var_672_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_647)[name = string("op_672")]; tensor var_673 = const()[name = string("op_673"), val = tensor([1, 8, 1, 64])]; tensor var_674 = reshape(shape = var_673, x = var_672)[name = string("op_674")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_656)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_656)[name = string("x2_9")]; tensor var_688_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_688_cast_fp16")]; tensor var_689_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_689_cast_fp16")]; tensor var_690_cast_fp16 = sub(x = var_688_cast_fp16, y = var_689_cast_fp16)[name = string("op_690_cast_fp16")]; tensor var_691_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_691_cast_fp16")]; tensor var_692_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_692_cast_fp16")]; tensor var_693_cast_fp16 = add(x = var_691_cast_fp16, y = var_692_cast_fp16)[name = string("op_693_cast_fp16")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9_cast_fp16 = concat(axis = var_49, interleave = rotated_9_interleave_0, values = (var_690_cast_fp16, var_693_cast_fp16))[name = string("rotated_9_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_665)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_665)[name = string("x2_11")]; tensor var_709_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_709_cast_fp16")]; tensor var_710_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_710_cast_fp16")]; tensor var_711_cast_fp16 = sub(x = var_709_cast_fp16, y = var_710_cast_fp16)[name = string("op_711_cast_fp16")]; tensor var_712_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_712_cast_fp16")]; tensor var_713_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_713_cast_fp16")]; tensor var_714_cast_fp16 = add(x = var_712_cast_fp16, y = var_713_cast_fp16)[name = string("op_714_cast_fp16")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11_cast_fp16 = concat(axis = var_49, interleave = rotated_11_interleave_0, values = (var_711_cast_fp16, var_714_cast_fp16))[name = string("rotated_11_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")]; tensor concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor([0])]; tensor concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor([0])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_349, concat_19_values3_0))[name = string("concat_19")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([18])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([19])]; int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)]; bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)]; tensor concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")]; tensor concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor([0])]; tensor concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor([0])]; int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)]; bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)]; tensor concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_349, concat_23_values3_0))[name = string("concat_23")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_674, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")]; tensor var_734_begin_0 = const()[name = string("op_734_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_734_end_0 = const()[name = string("op_734_end_0"), val = tensor([3, 8, 2048, 64])]; tensor var_734_end_mask_0 = const()[name = string("op_734_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_734_cast_fp16 = slice_by_index(begin = var_734_begin_0, end = var_734_end_0, end_mask = var_734_end_mask_0, x = coreml_update_state_21)[name = string("op_734_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_734_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_736_begin_0 = const()[name = string("op_736_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_736_end_0 = const()[name = string("op_736_end_0"), val = tensor([19, 8, 2048, 64])]; tensor var_736_end_mask_0 = const()[name = string("op_736_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_736_cast_fp16 = slice_by_index(begin = var_736_begin_0, end = var_736_end_0, end_mask = var_736_end_mask_0, x = coreml_update_state_21)[name = string("op_736_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_736_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_745 = const()[name = string("op_745"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_745, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_749 = const()[name = string("op_749"), val = tensor([1, -1, 2048, 64])]; tensor key_states_11_cast_fp16 = reshape(shape = var_749, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_752 = const()[name = string("op_752"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_752, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; tensor var_756 = const()[name = string("op_756"), val = tensor([1, -1, 2048, 64])]; tensor value_states_11_cast_fp16 = reshape(shape = var_756, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; bool var_759_transpose_x_1 = const()[name = string("op_759_transpose_x_1"), val = bool(false)]; bool var_759_transpose_y_1 = const()[name = string("op_759_transpose_y_1"), val = bool(true)]; tensor var_759_cast_fp16 = matmul(transpose_x = var_759_transpose_x_1, transpose_y = var_759_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_759_cast_fp16")]; fp16 var_760_to_fp16 = const()[name = string("op_760_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_9_cast_fp16 = mul(x = var_759_cast_fp16, y = var_760_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_771_axes_0 = const()[name = string("op_771_axes_0"), val = tensor([-1])]; bool var_771_keep_dims_0 = const()[name = string("op_771_keep_dims_0"), val = bool(true)]; tensor var_771_cast_fp16 = reduce_sum(axes = var_771_axes_0, keep_dims = var_771_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_771_cast_fp16")]; tensor attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_771_cast_fp16)[name = string("attn_weights_11_cast_fp16")]; bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_774_perm_0 = const()[name = string("op_774_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_776 = const()[name = string("op_776"), val = tensor([1, 1, 2048])]; tensor var_774_cast_fp16 = transpose(perm = var_774_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_22")]; tensor input_33_cast_fp16 = reshape(shape = var_776, x = var_774_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264977216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267074432))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_787_axes_0 = const()[name = string("op_787_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267082688)))]; tensor var_787_cast_fp16 = layer_norm(axes = var_787_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_787_cast_fp16")]; tensor var_794 = const()[name = string("op_794"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_795 = transpose(perm = var_794, x = var_787_cast_fp16)[name = string("transpose_21")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_795)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_817_axes_0 = const()[name = string("op_817_axes_0"), val = tensor([2])]; tensor var_817 = squeeze(axes = var_817_axes_0, x = hidden_states_23)[name = string("op_817")]; tensor var_818 = const()[name = string("op_818"), val = tensor([0, 2, 1])]; tensor var_819 = transpose(perm = var_818, x = var_817)[name = string("transpose_20")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_819)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_827_axes_0 = const()[name = string("op_827_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267086848)))]; tensor var_827_cast_fp16 = layer_norm(axes = var_827_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_827_cast_fp16")]; tensor var_830 = const()[name = string("op_830"), val = tensor([0, 2, 1])]; tensor var_832_axes_0 = const()[name = string("op_832_axes_0"), val = tensor([2])]; tensor var_831 = transpose(perm = var_830, x = var_827_cast_fp16)[name = string("transpose_19")]; tensor var_832 = expand_dims(axes = var_832_axes_0, x = var_831)[name = string("op_832")]; string var_839_pad_type_0 = const()[name = string("op_839_pad_type_0"), val = string("valid")]; tensor var_839_strides_0 = const()[name = string("op_839_strides_0"), val = tensor([1, 1])]; tensor var_839_pad_0 = const()[name = string("op_839_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_839_dilations_0 = const()[name = string("op_839_dilations_0"), val = tensor([1, 1])]; int32 var_839_groups_0 = const()[name = string("op_839_groups_0"), val = int32(1)]; tensor var_839 = conv(dilations = var_839_dilations_0, groups = var_839_groups_0, pad = var_839_pad_0, pad_type = var_839_pad_type_0, strides = var_839_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_832)[name = string("op_839")]; tensor var_840 = const()[name = string("op_840"), val = tensor([1, 32, 1, 64])]; tensor var_841 = reshape(shape = var_840, x = var_839)[name = string("op_841")]; string var_848_pad_type_0 = const()[name = string("op_848_pad_type_0"), val = string("valid")]; tensor var_848_strides_0 = const()[name = string("op_848_strides_0"), val = tensor([1, 1])]; tensor var_848_pad_0 = const()[name = string("op_848_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_848_dilations_0 = const()[name = string("op_848_dilations_0"), val = tensor([1, 1])]; int32 var_848_groups_0 = const()[name = string("op_848_groups_0"), val = int32(1)]; tensor var_848 = conv(dilations = var_848_dilations_0, groups = var_848_groups_0, pad = var_848_pad_0, pad_type = var_848_pad_type_0, strides = var_848_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_832)[name = string("op_848")]; tensor var_849 = const()[name = string("op_849"), val = tensor([1, 8, 1, 64])]; tensor var_850 = reshape(shape = var_849, x = var_848)[name = string("op_850")]; string var_857_pad_type_0 = const()[name = string("op_857_pad_type_0"), val = string("valid")]; tensor var_857_strides_0 = const()[name = string("op_857_strides_0"), val = tensor([1, 1])]; tensor var_857_pad_0 = const()[name = string("op_857_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_857_dilations_0 = const()[name = string("op_857_dilations_0"), val = tensor([1, 1])]; int32 var_857_groups_0 = const()[name = string("op_857_groups_0"), val = int32(1)]; tensor var_857 = conv(dilations = var_857_dilations_0, groups = var_857_groups_0, pad = var_857_pad_0, pad_type = var_857_pad_type_0, strides = var_857_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_832)[name = string("op_857")]; tensor var_858 = const()[name = string("op_858"), val = tensor([1, 8, 1, 64])]; tensor var_859 = reshape(shape = var_858, x = var_857)[name = string("op_859")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_841)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_841)[name = string("x2_13")]; tensor var_873_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_873_cast_fp16")]; tensor var_874_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_874_cast_fp16")]; tensor var_875_cast_fp16 = sub(x = var_873_cast_fp16, y = var_874_cast_fp16)[name = string("op_875_cast_fp16")]; tensor var_876_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_876_cast_fp16")]; tensor var_877_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_877_cast_fp16")]; tensor var_878_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = string("op_878_cast_fp16")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13_cast_fp16 = concat(axis = var_49, interleave = rotated_13_interleave_0, values = (var_875_cast_fp16, var_878_cast_fp16))[name = string("rotated_13_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_850)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_850)[name = string("x2_15")]; tensor var_894_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_894_cast_fp16")]; tensor var_895_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_895_cast_fp16")]; tensor var_896_cast_fp16 = sub(x = var_894_cast_fp16, y = var_895_cast_fp16)[name = string("op_896_cast_fp16")]; tensor var_897_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_897_cast_fp16")]; tensor var_898_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_898_cast_fp16")]; tensor var_899_cast_fp16 = add(x = var_897_cast_fp16, y = var_898_cast_fp16)[name = string("op_899_cast_fp16")]; bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; tensor rotated_15_cast_fp16 = concat(axis = var_49, interleave = rotated_15_interleave_0, values = (var_896_cast_fp16, var_899_cast_fp16))[name = string("rotated_15_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_349, concat_27_values3_0))[name = string("concat_27")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([19])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([20])]; int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")]; tensor concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor([0])]; tensor concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor([0])]; int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)]; bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)]; tensor concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_349, concat_31_values3_0))[name = string("concat_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_859, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")]; tensor var_919_begin_0 = const()[name = string("op_919_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_919_end_0 = const()[name = string("op_919_end_0"), val = tensor([4, 8, 2048, 64])]; tensor var_919_end_mask_0 = const()[name = string("op_919_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_919_cast_fp16 = slice_by_index(begin = var_919_begin_0, end = var_919_end_0, end_mask = var_919_end_mask_0, x = coreml_update_state_23)[name = string("op_919_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_919_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_921_begin_0 = const()[name = string("op_921_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_921_end_0 = const()[name = string("op_921_end_0"), val = tensor([20, 8, 2048, 64])]; tensor var_921_end_mask_0 = const()[name = string("op_921_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_921_cast_fp16 = slice_by_index(begin = var_921_begin_0, end = var_921_end_0, end_mask = var_921_end_mask_0, x = coreml_update_state_23)[name = string("op_921_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_921_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_930 = const()[name = string("op_930"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_930, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_934 = const()[name = string("op_934"), val = tensor([1, -1, 2048, 64])]; tensor key_states_15_cast_fp16 = reshape(shape = var_934, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_937 = const()[name = string("op_937"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_937, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_941 = const()[name = string("op_941"), val = tensor([1, -1, 2048, 64])]; tensor value_states_15_cast_fp16 = reshape(shape = var_941, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")]; bool var_944_transpose_x_1 = const()[name = string("op_944_transpose_x_1"), val = bool(false)]; bool var_944_transpose_y_1 = const()[name = string("op_944_transpose_y_1"), val = bool(true)]; tensor var_944_cast_fp16 = matmul(transpose_x = var_944_transpose_x_1, transpose_y = var_944_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_944_cast_fp16")]; fp16 var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_13_cast_fp16 = mul(x = var_944_cast_fp16, y = var_945_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; tensor var_956_axes_0 = const()[name = string("op_956_axes_0"), val = tensor([-1])]; bool var_956_keep_dims_0 = const()[name = string("op_956_keep_dims_0"), val = bool(true)]; tensor var_956_cast_fp16 = reduce_sum(axes = var_956_axes_0, keep_dims = var_956_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_956_cast_fp16")]; tensor attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_956_cast_fp16)[name = string("attn_weights_15_cast_fp16")]; bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)]; bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)]; tensor attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")]; tensor var_959_perm_0 = const()[name = string("op_959_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_961 = const()[name = string("op_961"), val = tensor([1, 1, 2048])]; tensor var_959_cast_fp16 = transpose(perm = var_959_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_18")]; tensor input_47_cast_fp16 = reshape(shape = var_961, x = var_959_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267091008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269188224))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_972_axes_0 = const()[name = string("op_972_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196480)))]; tensor var_972_cast_fp16 = layer_norm(axes = var_972_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_972_cast_fp16")]; tensor var_979 = const()[name = string("op_979"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_980 = transpose(perm = var_979, x = var_972_cast_fp16)[name = string("transpose_17")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_980)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; tensor var_1002_axes_0 = const()[name = string("op_1002_axes_0"), val = tensor([2])]; tensor var_1002 = squeeze(axes = var_1002_axes_0, x = hidden_states_31)[name = string("op_1002")]; tensor var_1003 = const()[name = string("op_1003"), val = tensor([0, 2, 1])]; tensor var_1004 = transpose(perm = var_1003, x = var_1002)[name = string("transpose_16")]; tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1004)[name = string("hidden_states_33_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; tensor var_1012_axes_0 = const()[name = string("op_1012_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269200640)))]; tensor var_1012_cast_fp16 = layer_norm(axes = var_1012_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1012_cast_fp16")]; tensor var_1015 = const()[name = string("op_1015"), val = tensor([0, 2, 1])]; tensor var_1017_axes_0 = const()[name = string("op_1017_axes_0"), val = tensor([2])]; tensor var_1016 = transpose(perm = var_1015, x = var_1012_cast_fp16)[name = string("transpose_15")]; tensor var_1017 = expand_dims(axes = var_1017_axes_0, x = var_1016)[name = string("op_1017")]; string var_1024_pad_type_0 = const()[name = string("op_1024_pad_type_0"), val = string("valid")]; tensor var_1024_strides_0 = const()[name = string("op_1024_strides_0"), val = tensor([1, 1])]; tensor var_1024_pad_0 = const()[name = string("op_1024_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1024_dilations_0 = const()[name = string("op_1024_dilations_0"), val = tensor([1, 1])]; int32 var_1024_groups_0 = const()[name = string("op_1024_groups_0"), val = int32(1)]; tensor var_1024 = conv(dilations = var_1024_dilations_0, groups = var_1024_groups_0, pad = var_1024_pad_0, pad_type = var_1024_pad_type_0, strides = var_1024_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1017)[name = string("op_1024")]; tensor var_1025 = const()[name = string("op_1025"), val = tensor([1, 32, 1, 64])]; tensor var_1026 = reshape(shape = var_1025, x = var_1024)[name = string("op_1026")]; string var_1033_pad_type_0 = const()[name = string("op_1033_pad_type_0"), val = string("valid")]; tensor var_1033_strides_0 = const()[name = string("op_1033_strides_0"), val = tensor([1, 1])]; tensor var_1033_pad_0 = const()[name = string("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1033_dilations_0 = const()[name = string("op_1033_dilations_0"), val = tensor([1, 1])]; int32 var_1033_groups_0 = const()[name = string("op_1033_groups_0"), val = int32(1)]; tensor var_1033 = conv(dilations = var_1033_dilations_0, groups = var_1033_groups_0, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1033_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1017)[name = string("op_1033")]; tensor var_1034 = const()[name = string("op_1034"), val = tensor([1, 8, 1, 64])]; tensor var_1035 = reshape(shape = var_1034, x = var_1033)[name = string("op_1035")]; string var_1042_pad_type_0 = const()[name = string("op_1042_pad_type_0"), val = string("valid")]; tensor var_1042_strides_0 = const()[name = string("op_1042_strides_0"), val = tensor([1, 1])]; tensor var_1042_pad_0 = const()[name = string("op_1042_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1042_dilations_0 = const()[name = string("op_1042_dilations_0"), val = tensor([1, 1])]; int32 var_1042_groups_0 = const()[name = string("op_1042_groups_0"), val = int32(1)]; tensor var_1042 = conv(dilations = var_1042_dilations_0, groups = var_1042_groups_0, pad = var_1042_pad_0, pad_type = var_1042_pad_type_0, strides = var_1042_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1017)[name = string("op_1042")]; tensor var_1043 = const()[name = string("op_1043"), val = tensor([1, 8, 1, 64])]; tensor var_1044 = reshape(shape = var_1043, x = var_1042)[name = string("op_1044")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1026)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1026)[name = string("x2_17")]; tensor var_1058_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1058_cast_fp16")]; tensor var_1059_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1059_cast_fp16")]; tensor var_1060_cast_fp16 = sub(x = var_1058_cast_fp16, y = var_1059_cast_fp16)[name = string("op_1060_cast_fp16")]; tensor var_1061_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1061_cast_fp16")]; tensor var_1062_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1062_cast_fp16")]; tensor var_1063_cast_fp16 = add(x = var_1061_cast_fp16, y = var_1062_cast_fp16)[name = string("op_1063_cast_fp16")]; bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; tensor rotated_17_cast_fp16 = concat(axis = var_49, interleave = rotated_17_interleave_0, values = (var_1060_cast_fp16, var_1063_cast_fp16))[name = string("rotated_17_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1035)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1035)[name = string("x2_19")]; tensor var_1079_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1079_cast_fp16")]; tensor var_1080_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1080_cast_fp16")]; tensor var_1081_cast_fp16 = sub(x = var_1079_cast_fp16, y = var_1080_cast_fp16)[name = string("op_1081_cast_fp16")]; tensor var_1082_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1082_cast_fp16")]; tensor var_1083_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1083_cast_fp16")]; tensor var_1084_cast_fp16 = add(x = var_1082_cast_fp16, y = var_1083_cast_fp16)[name = string("op_1084_cast_fp16")]; bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; tensor rotated_19_cast_fp16 = concat(axis = var_49, interleave = rotated_19_interleave_0, values = (var_1081_cast_fp16, var_1084_cast_fp16))[name = string("rotated_19_cast_fp16")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")]; tensor concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor([0])]; tensor concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor([0])]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_349, concat_35_values3_0))[name = string("concat_35")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([20])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([21])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_349, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1044, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")]; tensor var_1104_begin_0 = const()[name = string("op_1104_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1104_end_0 = const()[name = string("op_1104_end_0"), val = tensor([5, 8, 2048, 64])]; tensor var_1104_end_mask_0 = const()[name = string("op_1104_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1104_cast_fp16 = slice_by_index(begin = var_1104_begin_0, end = var_1104_end_0, end_mask = var_1104_end_mask_0, x = coreml_update_state_25)[name = string("op_1104_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1104_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_1106_begin_0 = const()[name = string("op_1106_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_1106_end_0 = const()[name = string("op_1106_end_0"), val = tensor([21, 8, 2048, 64])]; tensor var_1106_end_mask_0 = const()[name = string("op_1106_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1106_cast_fp16 = slice_by_index(begin = var_1106_begin_0, end = var_1106_end_0, end_mask = var_1106_end_mask_0, x = coreml_update_state_25)[name = string("op_1106_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1106_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1115 = const()[name = string("op_1115"), val = tensor([1, 4, 1, 1])]; tensor x_125_cast_fp16 = tile(reps = var_1115, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; tensor var_1119 = const()[name = string("op_1119"), val = tensor([1, -1, 2048, 64])]; tensor key_states_19_cast_fp16 = reshape(shape = var_1119, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_1122 = const()[name = string("op_1122"), val = tensor([1, 4, 1, 1])]; tensor x_131_cast_fp16 = tile(reps = var_1122, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; tensor var_1126 = const()[name = string("op_1126"), val = tensor([1, -1, 2048, 64])]; tensor value_states_19_cast_fp16 = reshape(shape = var_1126, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")]; bool var_1129_transpose_x_1 = const()[name = string("op_1129_transpose_x_1"), val = bool(false)]; bool var_1129_transpose_y_1 = const()[name = string("op_1129_transpose_y_1"), val = bool(true)]; tensor var_1129_cast_fp16 = matmul(transpose_x = var_1129_transpose_x_1, transpose_y = var_1129_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1129_cast_fp16")]; fp16 var_1130_to_fp16 = const()[name = string("op_1130_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_17_cast_fp16 = mul(x = var_1129_cast_fp16, y = var_1130_to_fp16)[name = string("attn_weights_17_cast_fp16")]; tensor x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; tensor var_1141_axes_0 = const()[name = string("op_1141_axes_0"), val = tensor([-1])]; bool var_1141_keep_dims_0 = const()[name = string("op_1141_keep_dims_0"), val = bool(true)]; tensor var_1141_cast_fp16 = reduce_sum(axes = var_1141_axes_0, keep_dims = var_1141_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1141_cast_fp16")]; tensor attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1141_cast_fp16)[name = string("attn_weights_19_cast_fp16")]; bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_1144_perm_0 = const()[name = string("op_1144_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1146 = const()[name = string("op_1146"), val = tensor([1, 1, 2048])]; tensor var_1144_cast_fp16 = transpose(perm = var_1144_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_14")]; tensor input_61_cast_fp16 = reshape(shape = var_1146, x = var_1144_cast_fp16)[name = string("input_61_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269204800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271302016))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1157_axes_0 = const()[name = string("op_1157_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271310272)))]; tensor var_1157_cast_fp16 = layer_norm(axes = var_1157_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1157_cast_fp16")]; tensor var_1164 = const()[name = string("op_1164"), val = tensor([0, 2, 1])]; tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; tensor var_1165 = transpose(perm = var_1164, x = var_1157_cast_fp16)[name = string("transpose_13")]; tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1165)[name = string("input_65")]; string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; tensor var_1187_axes_0 = const()[name = string("op_1187_axes_0"), val = tensor([2])]; tensor var_1187 = squeeze(axes = var_1187_axes_0, x = hidden_states_39)[name = string("op_1187")]; tensor var_1188 = const()[name = string("op_1188"), val = tensor([0, 2, 1])]; tensor var_1189 = transpose(perm = var_1188, x = var_1187)[name = string("transpose_12")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1189)[name = string("hidden_states_41_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; tensor var_1197_axes_0 = const()[name = string("op_1197_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271314432)))]; tensor var_1197_cast_fp16 = layer_norm(axes = var_1197_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1197_cast_fp16")]; tensor var_1200 = const()[name = string("op_1200"), val = tensor([0, 2, 1])]; tensor var_1202_axes_0 = const()[name = string("op_1202_axes_0"), val = tensor([2])]; tensor var_1201 = transpose(perm = var_1200, x = var_1197_cast_fp16)[name = string("transpose_11")]; tensor var_1202 = expand_dims(axes = var_1202_axes_0, x = var_1201)[name = string("op_1202")]; string var_1209_pad_type_0 = const()[name = string("op_1209_pad_type_0"), val = string("valid")]; tensor var_1209_strides_0 = const()[name = string("op_1209_strides_0"), val = tensor([1, 1])]; tensor var_1209_pad_0 = const()[name = string("op_1209_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1209_dilations_0 = const()[name = string("op_1209_dilations_0"), val = tensor([1, 1])]; int32 var_1209_groups_0 = const()[name = string("op_1209_groups_0"), val = int32(1)]; tensor var_1209 = conv(dilations = var_1209_dilations_0, groups = var_1209_groups_0, pad = var_1209_pad_0, pad_type = var_1209_pad_type_0, strides = var_1209_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1202)[name = string("op_1209")]; tensor var_1210 = const()[name = string("op_1210"), val = tensor([1, 32, 1, 64])]; tensor var_1211 = reshape(shape = var_1210, x = var_1209)[name = string("op_1211")]; string var_1218_pad_type_0 = const()[name = string("op_1218_pad_type_0"), val = string("valid")]; tensor var_1218_strides_0 = const()[name = string("op_1218_strides_0"), val = tensor([1, 1])]; tensor var_1218_pad_0 = const()[name = string("op_1218_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1218_dilations_0 = const()[name = string("op_1218_dilations_0"), val = tensor([1, 1])]; int32 var_1218_groups_0 = const()[name = string("op_1218_groups_0"), val = int32(1)]; tensor var_1218 = conv(dilations = var_1218_dilations_0, groups = var_1218_groups_0, pad = var_1218_pad_0, pad_type = var_1218_pad_type_0, strides = var_1218_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1202)[name = string("op_1218")]; tensor var_1219 = const()[name = string("op_1219"), val = tensor([1, 8, 1, 64])]; tensor var_1220 = reshape(shape = var_1219, x = var_1218)[name = string("op_1220")]; string var_1227_pad_type_0 = const()[name = string("op_1227_pad_type_0"), val = string("valid")]; tensor var_1227_strides_0 = const()[name = string("op_1227_strides_0"), val = tensor([1, 1])]; tensor var_1227_pad_0 = const()[name = string("op_1227_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1227_dilations_0 = const()[name = string("op_1227_dilations_0"), val = tensor([1, 1])]; int32 var_1227_groups_0 = const()[name = string("op_1227_groups_0"), val = int32(1)]; tensor var_1227 = conv(dilations = var_1227_dilations_0, groups = var_1227_groups_0, pad = var_1227_pad_0, pad_type = var_1227_pad_type_0, strides = var_1227_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1202)[name = string("op_1227")]; tensor var_1228 = const()[name = string("op_1228"), val = tensor([1, 8, 1, 64])]; tensor var_1229 = reshape(shape = var_1228, x = var_1227)[name = string("op_1229")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1211)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1211)[name = string("x2_21")]; tensor var_1243_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1243_cast_fp16")]; tensor var_1244_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1244_cast_fp16")]; tensor var_1245_cast_fp16 = sub(x = var_1243_cast_fp16, y = var_1244_cast_fp16)[name = string("op_1245_cast_fp16")]; tensor var_1246_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1246_cast_fp16")]; tensor var_1247_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1247_cast_fp16")]; tensor var_1248_cast_fp16 = add(x = var_1246_cast_fp16, y = var_1247_cast_fp16)[name = string("op_1248_cast_fp16")]; bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; tensor rotated_21_cast_fp16 = concat(axis = var_49, interleave = rotated_21_interleave_0, values = (var_1245_cast_fp16, var_1248_cast_fp16))[name = string("rotated_21_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1220)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1220)[name = string("x2_23")]; tensor var_1264_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1264_cast_fp16")]; tensor var_1265_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1265_cast_fp16")]; tensor var_1266_cast_fp16 = sub(x = var_1264_cast_fp16, y = var_1265_cast_fp16)[name = string("op_1266_cast_fp16")]; tensor var_1267_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1267_cast_fp16")]; tensor var_1268_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1268_cast_fp16")]; tensor var_1269_cast_fp16 = add(x = var_1267_cast_fp16, y = var_1268_cast_fp16)[name = string("op_1269_cast_fp16")]; bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; tensor rotated_23_cast_fp16 = concat(axis = var_49, interleave = rotated_23_interleave_0, values = (var_1266_cast_fp16, var_1269_cast_fp16))[name = string("rotated_23_cast_fp16")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_349, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([21])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([22])]; int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)]; bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)]; tensor concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")]; tensor concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor([0])]; tensor concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor([0])]; int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)]; bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)]; tensor concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_349, concat_47_values3_0))[name = string("concat_47")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1229, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")]; tensor var_1289_begin_0 = const()[name = string("op_1289_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1289_end_0 = const()[name = string("op_1289_end_0"), val = tensor([6, 8, 2048, 64])]; tensor var_1289_end_mask_0 = const()[name = string("op_1289_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1289_cast_fp16 = slice_by_index(begin = var_1289_begin_0, end = var_1289_end_0, end_mask = var_1289_end_mask_0, x = coreml_update_state_27)[name = string("op_1289_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1289_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_1291_end_0 = const()[name = string("op_1291_end_0"), val = tensor([22, 8, 2048, 64])]; tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = var_1291_end_0, end_mask = var_1291_end_mask_0, x = coreml_update_state_27)[name = string("op_1291_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1291_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1300 = const()[name = string("op_1300"), val = tensor([1, 4, 1, 1])]; tensor x_153_cast_fp16 = tile(reps = var_1300, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1304 = const()[name = string("op_1304"), val = tensor([1, -1, 2048, 64])]; tensor key_states_23_cast_fp16 = reshape(shape = var_1304, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1307 = const()[name = string("op_1307"), val = tensor([1, 4, 1, 1])]; tensor x_159_cast_fp16 = tile(reps = var_1307, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; tensor var_1311 = const()[name = string("op_1311"), val = tensor([1, -1, 2048, 64])]; tensor value_states_23_cast_fp16 = reshape(shape = var_1311, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")]; bool var_1314_transpose_x_1 = const()[name = string("op_1314_transpose_x_1"), val = bool(false)]; bool var_1314_transpose_y_1 = const()[name = string("op_1314_transpose_y_1"), val = bool(true)]; tensor var_1314_cast_fp16 = matmul(transpose_x = var_1314_transpose_x_1, transpose_y = var_1314_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1314_cast_fp16")]; fp16 var_1315_to_fp16 = const()[name = string("op_1315_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_21_cast_fp16 = mul(x = var_1314_cast_fp16, y = var_1315_to_fp16)[name = string("attn_weights_21_cast_fp16")]; tensor x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; tensor var_1326_axes_0 = const()[name = string("op_1326_axes_0"), val = tensor([-1])]; bool var_1326_keep_dims_0 = const()[name = string("op_1326_keep_dims_0"), val = bool(true)]; tensor var_1326_cast_fp16 = reduce_sum(axes = var_1326_axes_0, keep_dims = var_1326_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1326_cast_fp16")]; tensor attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1326_cast_fp16)[name = string("attn_weights_23_cast_fp16")]; bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)]; bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)]; tensor attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")]; tensor var_1329_perm_0 = const()[name = string("op_1329_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1331 = const()[name = string("op_1331"), val = tensor([1, 1, 2048])]; tensor var_1329_cast_fp16 = transpose(perm = var_1329_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_10")]; tensor input_75_cast_fp16 = reshape(shape = var_1331, x = var_1329_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271318592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273415808))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; tensor var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273424064)))]; tensor var_1342_cast_fp16 = layer_norm(axes = var_1342_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1342_cast_fp16")]; tensor var_1349 = const()[name = string("op_1349"), val = tensor([0, 2, 1])]; tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; tensor var_1350 = transpose(perm = var_1349, x = var_1342_cast_fp16)[name = string("transpose_9")]; tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1350)[name = string("input_79")]; string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; tensor var_1372_axes_0 = const()[name = string("op_1372_axes_0"), val = tensor([2])]; tensor var_1372 = squeeze(axes = var_1372_axes_0, x = hidden_states_47)[name = string("op_1372")]; tensor var_1373 = const()[name = string("op_1373"), val = tensor([0, 2, 1])]; tensor var_1374 = transpose(perm = var_1373, x = var_1372)[name = string("transpose_8")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1374)[name = string("hidden_states_49_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; tensor var_1382_axes_0 = const()[name = string("op_1382_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273428224)))]; tensor var_1382_cast_fp16 = layer_norm(axes = var_1382_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1382_cast_fp16")]; tensor var_1385 = const()[name = string("op_1385"), val = tensor([0, 2, 1])]; tensor var_1387_axes_0 = const()[name = string("op_1387_axes_0"), val = tensor([2])]; tensor var_1386 = transpose(perm = var_1385, x = var_1382_cast_fp16)[name = string("transpose_7")]; tensor var_1387 = expand_dims(axes = var_1387_axes_0, x = var_1386)[name = string("op_1387")]; string var_1394_pad_type_0 = const()[name = string("op_1394_pad_type_0"), val = string("valid")]; tensor var_1394_strides_0 = const()[name = string("op_1394_strides_0"), val = tensor([1, 1])]; tensor var_1394_pad_0 = const()[name = string("op_1394_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1394_dilations_0 = const()[name = string("op_1394_dilations_0"), val = tensor([1, 1])]; int32 var_1394_groups_0 = const()[name = string("op_1394_groups_0"), val = int32(1)]; tensor var_1394 = conv(dilations = var_1394_dilations_0, groups = var_1394_groups_0, pad = var_1394_pad_0, pad_type = var_1394_pad_type_0, strides = var_1394_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1387)[name = string("op_1394")]; tensor var_1395 = const()[name = string("op_1395"), val = tensor([1, 32, 1, 64])]; tensor var_1396 = reshape(shape = var_1395, x = var_1394)[name = string("op_1396")]; string var_1403_pad_type_0 = const()[name = string("op_1403_pad_type_0"), val = string("valid")]; tensor var_1403_strides_0 = const()[name = string("op_1403_strides_0"), val = tensor([1, 1])]; tensor var_1403_pad_0 = const()[name = string("op_1403_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1403_dilations_0 = const()[name = string("op_1403_dilations_0"), val = tensor([1, 1])]; int32 var_1403_groups_0 = const()[name = string("op_1403_groups_0"), val = int32(1)]; tensor var_1403 = conv(dilations = var_1403_dilations_0, groups = var_1403_groups_0, pad = var_1403_pad_0, pad_type = var_1403_pad_type_0, strides = var_1403_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1387)[name = string("op_1403")]; tensor var_1404 = const()[name = string("op_1404"), val = tensor([1, 8, 1, 64])]; tensor var_1405 = reshape(shape = var_1404, x = var_1403)[name = string("op_1405")]; string var_1412_pad_type_0 = const()[name = string("op_1412_pad_type_0"), val = string("valid")]; tensor var_1412_strides_0 = const()[name = string("op_1412_strides_0"), val = tensor([1, 1])]; tensor var_1412_pad_0 = const()[name = string("op_1412_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1412_dilations_0 = const()[name = string("op_1412_dilations_0"), val = tensor([1, 1])]; int32 var_1412_groups_0 = const()[name = string("op_1412_groups_0"), val = int32(1)]; tensor var_1412 = conv(dilations = var_1412_dilations_0, groups = var_1412_groups_0, pad = var_1412_pad_0, pad_type = var_1412_pad_type_0, strides = var_1412_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1387)[name = string("op_1412")]; tensor var_1413 = const()[name = string("op_1413"), val = tensor([1, 8, 1, 64])]; tensor var_1414 = reshape(shape = var_1413, x = var_1412)[name = string("op_1414")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1396)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1396)[name = string("x2_25")]; tensor var_1428_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1428_cast_fp16")]; tensor var_1429_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1429_cast_fp16")]; tensor var_1430_cast_fp16 = sub(x = var_1428_cast_fp16, y = var_1429_cast_fp16)[name = string("op_1430_cast_fp16")]; tensor var_1431_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1431_cast_fp16")]; tensor var_1432_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1432_cast_fp16")]; tensor var_1433_cast_fp16 = add(x = var_1431_cast_fp16, y = var_1432_cast_fp16)[name = string("op_1433_cast_fp16")]; bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; tensor rotated_25_cast_fp16 = concat(axis = var_49, interleave = rotated_25_interleave_0, values = (var_1430_cast_fp16, var_1433_cast_fp16))[name = string("rotated_25_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1405)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1405)[name = string("x2_27")]; tensor var_1449_cast_fp16 = mul(x = x1_27, y = cos_3_cast_fp16)[name = string("op_1449_cast_fp16")]; tensor var_1450_cast_fp16 = mul(x = x2_27, y = sin_3_cast_fp16)[name = string("op_1450_cast_fp16")]; tensor var_1451_cast_fp16 = sub(x = var_1449_cast_fp16, y = var_1450_cast_fp16)[name = string("op_1451_cast_fp16")]; tensor var_1452_cast_fp16 = mul(x = x2_27, y = cos_3_cast_fp16)[name = string("op_1452_cast_fp16")]; tensor var_1453_cast_fp16 = mul(x = x1_27, y = sin_3_cast_fp16)[name = string("op_1453_cast_fp16")]; tensor var_1454_cast_fp16 = add(x = var_1452_cast_fp16, y = var_1453_cast_fp16)[name = string("op_1454_cast_fp16")]; bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; tensor rotated_27_cast_fp16 = concat(axis = var_49, interleave = rotated_27_interleave_0, values = (var_1451_cast_fp16, var_1454_cast_fp16))[name = string("rotated_27_cast_fp16")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)]; bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)]; tensor concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")]; tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_349, concat_51_values3_0))[name = string("concat_51")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27_cast_fp16, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([22])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([23])]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")]; tensor concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor([0])]; tensor concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor([0])]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_349, concat_55_values3_0))[name = string("concat_55")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1414, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")]; tensor var_1474_begin_0 = const()[name = string("op_1474_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_1474_end_0 = const()[name = string("op_1474_end_0"), val = tensor([7, 8, 2048, 64])]; tensor var_1474_end_mask_0 = const()[name = string("op_1474_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = coreml_update_state_29)[name = string("op_1474_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1474_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_1476_begin_0 = const()[name = string("op_1476_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_1476_end_0 = const()[name = string("op_1476_end_0"), val = tensor([23, 8, 2048, 64])]; tensor var_1476_end_mask_0 = const()[name = string("op_1476_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1476_cast_fp16 = slice_by_index(begin = var_1476_begin_0, end = var_1476_end_0, end_mask = var_1476_end_mask_0, x = coreml_update_state_29)[name = string("op_1476_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1476_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; tensor var_1485 = const()[name = string("op_1485"), val = tensor([1, 4, 1, 1])]; tensor x_181_cast_fp16 = tile(reps = var_1485, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_1489 = const()[name = string("op_1489"), val = tensor([1, -1, 2048, 64])]; tensor key_states_27_cast_fp16 = reshape(shape = var_1489, x = x_181_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; tensor var_1492 = const()[name = string("op_1492"), val = tensor([1, 4, 1, 1])]; tensor x_187_cast_fp16 = tile(reps = var_1492, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_1496 = const()[name = string("op_1496"), val = tensor([1, -1, 2048, 64])]; tensor value_states_27_cast_fp16 = reshape(shape = var_1496, x = x_187_cast_fp16)[name = string("value_states_27_cast_fp16")]; bool var_1499_transpose_x_1 = const()[name = string("op_1499_transpose_x_1"), val = bool(false)]; bool var_1499_transpose_y_1 = const()[name = string("op_1499_transpose_y_1"), val = bool(true)]; tensor var_1499_cast_fp16 = matmul(transpose_x = var_1499_transpose_x_1, transpose_y = var_1499_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_27_cast_fp16)[name = string("op_1499_cast_fp16")]; fp16 var_1500_to_fp16 = const()[name = string("op_1500_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_25_cast_fp16 = mul(x = var_1499_cast_fp16, y = var_1500_to_fp16)[name = string("attn_weights_25_cast_fp16")]; tensor x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; tensor var_1511_axes_0 = const()[name = string("op_1511_axes_0"), val = tensor([-1])]; bool var_1511_keep_dims_0 = const()[name = string("op_1511_keep_dims_0"), val = bool(true)]; tensor var_1511_cast_fp16 = reduce_sum(axes = var_1511_axes_0, keep_dims = var_1511_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1511_cast_fp16")]; tensor attn_weights_27_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1511_cast_fp16)[name = string("attn_weights_27_cast_fp16")]; bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_27_cast_fp16, y = value_states_27_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_1514_perm_0 = const()[name = string("op_1514_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1516 = const()[name = string("op_1516"), val = tensor([1, 1, 2048])]; tensor var_1514_cast_fp16 = transpose(perm = var_1514_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_6")]; tensor input_89_cast_fp16 = reshape(shape = var_1516, x = var_1514_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273432384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275529600))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_1527_axes_0 = const()[name = string("op_1527_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275537856)))]; tensor var_1527_cast_fp16 = layer_norm(axes = var_1527_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1527_cast_fp16")]; tensor var_1534 = const()[name = string("op_1534"), val = tensor([0, 2, 1])]; tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; tensor var_1535 = transpose(perm = var_1534, x = var_1527_cast_fp16)[name = string("transpose_5")]; tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1535)[name = string("input_93")]; string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; tensor var_1557_axes_0 = const()[name = string("op_1557_axes_0"), val = tensor([2])]; tensor var_1557 = squeeze(axes = var_1557_axes_0, x = hidden_states_55)[name = string("op_1557")]; tensor var_1558 = const()[name = string("op_1558"), val = tensor([0, 2, 1])]; tensor var_1559 = transpose(perm = var_1558, x = var_1557)[name = string("transpose_4")]; tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1559)[name = string("hidden_states_57_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1567_axes_0 = const()[name = string("op_1567_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275542016)))]; tensor var_1567_cast_fp16 = layer_norm(axes = var_1567_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1567_cast_fp16")]; tensor var_1570 = const()[name = string("op_1570"), val = tensor([0, 2, 1])]; tensor var_1572_axes_0 = const()[name = string("op_1572_axes_0"), val = tensor([2])]; tensor var_1571 = transpose(perm = var_1570, x = var_1567_cast_fp16)[name = string("transpose_3")]; tensor var_1572 = expand_dims(axes = var_1572_axes_0, x = var_1571)[name = string("op_1572")]; string var_1579_pad_type_0 = const()[name = string("op_1579_pad_type_0"), val = string("valid")]; tensor var_1579_strides_0 = const()[name = string("op_1579_strides_0"), val = tensor([1, 1])]; tensor var_1579_pad_0 = const()[name = string("op_1579_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1579_dilations_0 = const()[name = string("op_1579_dilations_0"), val = tensor([1, 1])]; int32 var_1579_groups_0 = const()[name = string("op_1579_groups_0"), val = int32(1)]; tensor var_1579 = conv(dilations = var_1579_dilations_0, groups = var_1579_groups_0, pad = var_1579_pad_0, pad_type = var_1579_pad_type_0, strides = var_1579_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1572)[name = string("op_1579")]; tensor var_1580 = const()[name = string("op_1580"), val = tensor([1, 32, 1, 64])]; tensor var_1581 = reshape(shape = var_1580, x = var_1579)[name = string("op_1581")]; string var_1588_pad_type_0 = const()[name = string("op_1588_pad_type_0"), val = string("valid")]; tensor var_1588_strides_0 = const()[name = string("op_1588_strides_0"), val = tensor([1, 1])]; tensor var_1588_pad_0 = const()[name = string("op_1588_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1588_dilations_0 = const()[name = string("op_1588_dilations_0"), val = tensor([1, 1])]; int32 var_1588_groups_0 = const()[name = string("op_1588_groups_0"), val = int32(1)]; tensor var_1588 = conv(dilations = var_1588_dilations_0, groups = var_1588_groups_0, pad = var_1588_pad_0, pad_type = var_1588_pad_type_0, strides = var_1588_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1572)[name = string("op_1588")]; tensor var_1589 = const()[name = string("op_1589"), val = tensor([1, 8, 1, 64])]; tensor var_1590 = reshape(shape = var_1589, x = var_1588)[name = string("op_1590")]; string var_1597_pad_type_0 = const()[name = string("op_1597_pad_type_0"), val = string("valid")]; tensor var_1597_strides_0 = const()[name = string("op_1597_strides_0"), val = tensor([1, 1])]; tensor var_1597_pad_0 = const()[name = string("op_1597_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1597_dilations_0 = const()[name = string("op_1597_dilations_0"), val = tensor([1, 1])]; int32 var_1597_groups_0 = const()[name = string("op_1597_groups_0"), val = int32(1)]; tensor var_1597 = conv(dilations = var_1597_dilations_0, groups = var_1597_groups_0, pad = var_1597_pad_0, pad_type = var_1597_pad_type_0, strides = var_1597_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1572)[name = string("op_1597")]; tensor var_1598 = const()[name = string("op_1598"), val = tensor([1, 8, 1, 64])]; tensor var_1599 = reshape(shape = var_1598, x = var_1597)[name = string("op_1599")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 1, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1581)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 1, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1581)[name = string("x2_29")]; tensor var_1613_cast_fp16 = mul(x = x1_29, y = cos_3_cast_fp16)[name = string("op_1613_cast_fp16")]; tensor var_1614_cast_fp16 = mul(x = x2_29, y = sin_3_cast_fp16)[name = string("op_1614_cast_fp16")]; tensor var_1615_cast_fp16 = sub(x = var_1613_cast_fp16, y = var_1614_cast_fp16)[name = string("op_1615_cast_fp16")]; tensor var_1616_cast_fp16 = mul(x = x2_29, y = cos_3_cast_fp16)[name = string("op_1616_cast_fp16")]; tensor var_1617_cast_fp16 = mul(x = x1_29, y = sin_3_cast_fp16)[name = string("op_1617_cast_fp16")]; tensor var_1618_cast_fp16 = add(x = var_1616_cast_fp16, y = var_1617_cast_fp16)[name = string("op_1618_cast_fp16")]; bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; tensor rotated_29_cast_fp16 = concat(axis = var_49, interleave = rotated_29_interleave_0, values = (var_1615_cast_fp16, var_1618_cast_fp16))[name = string("rotated_29_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1590)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1590)[name = string("x2")]; tensor var_1634_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1634_cast_fp16")]; tensor var_1635_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1635_cast_fp16")]; tensor var_1636_cast_fp16 = sub(x = var_1634_cast_fp16, y = var_1635_cast_fp16)[name = string("op_1636_cast_fp16")]; tensor var_1637_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1637_cast_fp16")]; tensor var_1638_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1638_cast_fp16")]; tensor var_1639_cast_fp16 = add(x = var_1637_cast_fp16, y = var_1638_cast_fp16)[name = string("op_1639_cast_fp16")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated_cast_fp16 = concat(axis = var_49, interleave = rotated_interleave_0, values = (var_1636_cast_fp16, var_1639_cast_fp16))[name = string("rotated_cast_fp16")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_58_axis_0 = const()[name = string("concat_58_axis_0"), val = int32(0)]; bool concat_58_interleave_0 = const()[name = string("concat_58_interleave_0"), val = bool(false)]; tensor concat_58 = concat(axis = concat_58_axis_0, interleave = concat_58_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_58")]; tensor concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = tensor([0])]; tensor concat_59_values3_0 = const()[name = string("concat_59_values3_0"), val = tensor([0])]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (expand_dims_88, concat_59_values1_0, var_349, concat_59_values3_0))[name = string("concat_59")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_58, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_59, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated_cast_fp16, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([23])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([24])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_62")]; tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (expand_dims_94, concat_63_values1_0, var_349, concat_63_values3_0))[name = string("concat_63")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_62, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_63, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = var_1599, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")]; tensor var_1659_begin_0 = const()[name = string("op_1659_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_1659_end_0 = const()[name = string("op_1659_end_0"), val = tensor([8, 8, 2048, 64])]; tensor var_1659_end_mask_0 = const()[name = string("op_1659_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1659_cast_fp16 = slice_by_index(begin = var_1659_begin_0, end = var_1659_end_0, end_mask = var_1659_end_mask_0, x = coreml_update_state_31)[name = string("op_1659_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1659_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_1661_begin_0 = const()[name = string("op_1661_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_1661_end_0 = const()[name = string("op_1661_end_0"), val = tensor([24, 8, 2048, 64])]; tensor var_1661_end_mask_0 = const()[name = string("op_1661_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1661_cast_fp16 = slice_by_index(begin = var_1661_begin_0, end = var_1661_end_0, end_mask = var_1661_end_mask_0, x = coreml_update_state_31)[name = string("op_1661_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1661_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_1670 = const()[name = string("op_1670"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_1670, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_1674 = const()[name = string("op_1674"), val = tensor([1, -1, 2048, 64])]; tensor key_states_cast_fp16 = reshape(shape = var_1674, x = x_209_cast_fp16)[name = string("key_states_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_1677 = const()[name = string("op_1677"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_1677, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; tensor var_1681 = const()[name = string("op_1681"), val = tensor([1, -1, 2048, 64])]; tensor value_states_cast_fp16 = reshape(shape = var_1681, x = x_215_cast_fp16)[name = string("value_states_cast_fp16")]; bool var_1684_transpose_x_1 = const()[name = string("op_1684_transpose_x_1"), val = bool(false)]; bool var_1684_transpose_y_1 = const()[name = string("op_1684_transpose_y_1"), val = bool(true)]; tensor var_1684_cast_fp16 = matmul(transpose_x = var_1684_transpose_x_1, transpose_y = var_1684_transpose_y_1, x = rotated_29_cast_fp16, y = key_states_cast_fp16)[name = string("op_1684_cast_fp16")]; fp16 var_1685_to_fp16 = const()[name = string("op_1685_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_29_cast_fp16 = mul(x = var_1684_cast_fp16, y = var_1685_to_fp16)[name = string("attn_weights_29_cast_fp16")]; tensor x_217_cast_fp16 = add(x = attn_weights_29_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_1696_axes_0 = const()[name = string("op_1696_axes_0"), val = tensor([-1])]; bool var_1696_keep_dims_0 = const()[name = string("op_1696_keep_dims_0"), val = bool(true)]; tensor var_1696_cast_fp16 = reduce_sum(axes = var_1696_axes_0, keep_dims = var_1696_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1696_cast_fp16")]; tensor attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1696_cast_fp16)[name = string("attn_weights_cast_fp16")]; bool attn_output_43_transpose_x_0 = const()[name = string("attn_output_43_transpose_x_0"), val = bool(false)]; bool attn_output_43_transpose_y_0 = const()[name = string("attn_output_43_transpose_y_0"), val = bool(false)]; tensor attn_output_43_cast_fp16 = matmul(transpose_x = attn_output_43_transpose_x_0, transpose_y = attn_output_43_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_43_cast_fp16")]; tensor var_1699_perm_0 = const()[name = string("op_1699_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1701 = const()[name = string("op_1701"), val = tensor([1, 1, 2048])]; tensor var_1699_cast_fp16 = transpose(perm = var_1699_perm_0, x = attn_output_43_cast_fp16)[name = string("transpose_2")]; tensor input_103_cast_fp16 = reshape(shape = var_1701, x = var_1699_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275546176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277643392))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_cast_fp16)[name = string("input_105_cast_fp16")]; tensor var_1712_axes_0 = const()[name = string("op_1712_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277651648)))]; tensor var_1712_cast_fp16 = layer_norm(axes = var_1712_axes_0, epsilon = var_44_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1712_cast_fp16")]; tensor var_1719 = const()[name = string("op_1719"), val = tensor([0, 2, 1])]; tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; tensor var_1720 = transpose(perm = var_1719, x = var_1712_cast_fp16)[name = string("transpose_1")]; tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1720)[name = string("input_107")]; string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states")]; tensor gate_states = silu(x = input_109)[name = string("gate_states")]; tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; tensor var_1742_axes_0 = const()[name = string("op_1742_axes_0"), val = tensor([2])]; tensor var_1742 = squeeze(axes = var_1742_axes_0, x = hidden_states_1)[name = string("op_1742")]; tensor var_1743 = const()[name = string("op_1743"), val = tensor([0, 2, 1])]; tensor var_1744 = transpose(perm = var_1743, x = var_1742)[name = string("transpose_0")]; tensor output_hidden_states = add(x = hidden_states_61_cast_fp16, y = var_1744)[name = string("op_1745_cast_fp16")]; tensor position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")]; } -> (output_hidden_states); func prefill(tensor causal_mask, tensor current_pos, tensor hidden_states, state> model_model_kv_cache_0, tensor position_ids) { tensor model_model_layers_0_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2097280))))[name = string("model_model_layers_0_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2105536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2629888))))[name = string("model_model_layers_0_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_0_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2632000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3156352))))[name = string("model_model_layers_0_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_0_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3158464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11547136))))[name = string("model_model_layers_0_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_0_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11579968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19968640))))[name = string("model_model_layers_0_mlp_up_proj_weight_palettized")]; tensor model_model_layers_0_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20001472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28390144))))[name = string("model_model_layers_0_mlp_down_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28398400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30495616))))[name = string("model_model_layers_1_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30503872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31028224))))[name = string("model_model_layers_1_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_1_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31554688))))[name = string("model_model_layers_1_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_1_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31556800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39945472))))[name = string("model_model_layers_1_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_1_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39978304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48366976))))[name = string("model_model_layers_1_mlp_up_proj_weight_palettized")]; tensor model_model_layers_1_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48399808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56788480))))[name = string("model_model_layers_1_mlp_down_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56796736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58893952))))[name = string("model_model_layers_2_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58902208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59426560))))[name = string("model_model_layers_2_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_2_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59428672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59953024))))[name = string("model_model_layers_2_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_2_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59955136))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68343808))))[name = string("model_model_layers_2_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_2_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68376640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76765312))))[name = string("model_model_layers_2_mlp_up_proj_weight_palettized")]; tensor model_model_layers_2_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76798144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85186816))))[name = string("model_model_layers_2_mlp_down_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85195072))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87292288))))[name = string("model_model_layers_3_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87300544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87824896))))[name = string("model_model_layers_3_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_3_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87827008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88351360))))[name = string("model_model_layers_3_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_3_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88353472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96742144))))[name = string("model_model_layers_3_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_3_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96774976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105163648))))[name = string("model_model_layers_3_mlp_up_proj_weight_palettized")]; tensor model_model_layers_3_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105196480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113585152))))[name = string("model_model_layers_3_mlp_down_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113593408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115690624))))[name = string("model_model_layers_4_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115698880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116223232))))[name = string("model_model_layers_4_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_4_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116225344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116749696))))[name = string("model_model_layers_4_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_4_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116751808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125140480))))[name = string("model_model_layers_4_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_4_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125173312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133561984))))[name = string("model_model_layers_4_mlp_up_proj_weight_palettized")]; tensor model_model_layers_4_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133594816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141983488))))[name = string("model_model_layers_4_mlp_down_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141991744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144088960))))[name = string("model_model_layers_5_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144097216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144621568))))[name = string("model_model_layers_5_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_5_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144623680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145148032))))[name = string("model_model_layers_5_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_5_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145150144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153538816))))[name = string("model_model_layers_5_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_5_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153571648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161960320))))[name = string("model_model_layers_5_mlp_up_proj_weight_palettized")]; tensor model_model_layers_5_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161993152))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170381824))))[name = string("model_model_layers_5_mlp_down_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170390080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172487296))))[name = string("model_model_layers_6_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172495552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173019904))))[name = string("model_model_layers_6_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_6_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173022016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173546368))))[name = string("model_model_layers_6_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_6_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173548480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181937152))))[name = string("model_model_layers_6_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_6_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181969984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190358656))))[name = string("model_model_layers_6_mlp_up_proj_weight_palettized")]; tensor model_model_layers_6_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190391488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198780160))))[name = string("model_model_layers_6_mlp_down_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198788416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200885632))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200893888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201418240))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")]; tensor model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201420352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201944704))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")]; tensor model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201946816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210335488))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")]; tensor model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210368320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218756992))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")]; tensor model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218789824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227178496))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")]; int32 var_44 = const()[name = string("op_44"), val = int32(-1)]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)]; tensor add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")]; int32 var_261_axis_0 = const()[name = string("op_261_axis_0"), val = int32(1)]; int32 var_261_batch_dims_0 = const()[name = string("op_261_batch_dims_0"), val = int32(0)]; bool var_261_validate_indices_0 = const()[name = string("op_261_validate_indices_0"), val = bool(false)]; tensor var_55_to_fp16 = const()[name = string("op_55_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243964032)))]; tensor var_261_cast_fp16 = gather(axis = var_261_axis_0, batch_dims = var_261_batch_dims_0, indices = select_0, validate_indices = var_261_validate_indices_0, x = var_55_to_fp16)[name = string("op_261_cast_fp16")]; tensor var_262 = const()[name = string("op_262"), val = tensor([1, 64, 1, 64])]; tensor cos_1_cast_fp16 = reshape(shape = var_262, x = var_261_cast_fp16)[name = string("cos_1_cast_fp16")]; int32 var_266_axis_0 = const()[name = string("op_266_axis_0"), val = int32(1)]; int32 var_266_batch_dims_0 = const()[name = string("op_266_batch_dims_0"), val = int32(0)]; bool var_266_validate_indices_0 = const()[name = string("op_266_validate_indices_0"), val = bool(false)]; tensor var_50_to_fp16 = const()[name = string("op_50_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227186752)))]; tensor var_266_cast_fp16 = gather(axis = var_266_axis_0, batch_dims = var_266_batch_dims_0, indices = select_0, validate_indices = var_266_validate_indices_0, x = var_50_to_fp16)[name = string("op_266_cast_fp16")]; tensor var_267 = const()[name = string("op_267"), val = tensor([1, 64, 1, 64])]; tensor sin_1_cast_fp16 = reshape(shape = var_267, x = var_266_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor([-1])]; bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)]; tensor mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")]; tensor input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")]; tensor var_277_axes_0 = const()[name = string("op_277_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260741312)))]; fp16 var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_277_cast_fp16 = layer_norm(axes = var_277_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_0_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_277_cast_fp16")]; tensor var_281 = const()[name = string("op_281"), val = tensor([0, 2, 1])]; tensor var_283_axes_0 = const()[name = string("op_283_axes_0"), val = tensor([2])]; tensor var_282 = transpose(perm = var_281, x = var_277_cast_fp16)[name = string("transpose_57")]; tensor var_283 = expand_dims(axes = var_283_axes_0, x = var_282)[name = string("op_283")]; string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")]; tensor query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor([1, 1])]; tensor query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor([1, 1])]; int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)]; tensor query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_0_self_attn_q_proj_weight_palettized, x = var_283)[name = string("query_states_1")]; string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")]; tensor key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor([1, 1])]; tensor key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor([1, 1])]; int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)]; tensor key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_0_self_attn_k_proj_weight_palettized, x = var_283)[name = string("key_states_1")]; string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")]; tensor value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor([1, 1])]; tensor value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor([1, 1])]; int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)]; tensor value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_0_self_attn_v_proj_weight_palettized, x = var_283)[name = string("value_states_1")]; tensor var_303 = const()[name = string("op_303"), val = tensor([1, 32, 64, 64])]; tensor var_304 = reshape(shape = var_303, x = query_states_1)[name = string("op_304")]; tensor var_305 = const()[name = string("op_305"), val = tensor([0, 1, 3, 2])]; tensor var_307 = const()[name = string("op_307"), val = tensor([1, 8, 64, 64])]; tensor var_308 = reshape(shape = var_307, x = key_states_1)[name = string("op_308")]; tensor var_309 = const()[name = string("op_309"), val = tensor([0, 1, 3, 2])]; tensor var_311 = const()[name = string("op_311"), val = tensor([1, 8, 64, 64])]; tensor var_312 = reshape(shape = var_311, x = value_states_1)[name = string("op_312")]; tensor var_313 = const()[name = string("op_313"), val = tensor([0, 1, 3, 2])]; tensor var_315 = const()[name = string("op_315"), val = tensor([0, 2, 1, 3])]; tensor var_317 = const()[name = string("op_317"), val = tensor([0, 2, 1, 3])]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_1 = transpose(perm = var_305, x = var_304)[name = string("transpose_56")]; tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")]; tensor cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor([1, 1, 64, 32])]; tensor cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor cos_5 = transpose(perm = var_315, x = cos_1_cast_fp16)[name = string("transpose_55")]; tensor cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")]; tensor sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor([1, 1, 64, 32])]; tensor sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor sin_5 = transpose(perm = var_317, x = sin_1_cast_fp16)[name = string("transpose_54")]; tensor sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")]; tensor var_331 = mul(x = x1_1, y = cos_7)[name = string("op_331")]; tensor var_332 = mul(x = x2_1, y = sin_7)[name = string("op_332")]; tensor var_333 = sub(x = var_331, y = var_332)[name = string("op_333")]; tensor var_334 = mul(x = x2_1, y = cos_7)[name = string("op_334")]; tensor var_335 = mul(x = x1_1, y = sin_7)[name = string("op_335")]; tensor var_336 = add(x = var_334, y = var_335)[name = string("op_336")]; bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)]; tensor rotated_1 = concat(axis = var_44, interleave = rotated_1_interleave_0, values = (var_333, var_336))[name = string("rotated_1")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_5 = transpose(perm = var_309, x = var_308)[name = string("transpose_53")]; tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")]; tensor var_352 = mul(x = x1_3, y = cos_7)[name = string("op_352")]; tensor var_353 = mul(x = x2_3, y = sin_7)[name = string("op_353")]; tensor var_354 = sub(x = var_352, y = var_353)[name = string("op_354")]; tensor var_355 = mul(x = x2_3, y = cos_7)[name = string("op_355")]; tensor var_356 = mul(x = x1_3, y = sin_7)[name = string("op_356")]; tensor var_357 = add(x = var_355, y = var_356)[name = string("op_357")]; bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)]; tensor rotated_3 = concat(axis = var_44, interleave = rotated_3_interleave_0, values = (var_354, var_357))[name = string("rotated_3")]; tensor seq_length_1 = const()[name = string("seq_length_1"), val = tensor([64])]; tensor var_366 = add(x = current_pos, y = seq_length_1)[name = string("op_366")]; tensor read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([1])]; int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)]; bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")]; tensor concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor([0])]; tensor concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor([0])]; int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)]; bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_366, concat_3_values3_0))[name = string("concat_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor([16])]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor([0])]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor([0])]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor([17])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_366, concat_7_values3_0))[name = string("concat_7")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_3 = transpose(perm = var_313, x = var_312)[name = string("transpose_52")]; tensor model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")]; tensor var_380_begin_0 = const()[name = string("op_380_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_380_end_0 = const()[name = string("op_380_end_0"), val = tensor([1, 8, 2048, 64])]; tensor var_380_end_mask_0 = const()[name = string("op_380_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = coreml_update_state_17)[name = string("op_380_cast_fp16")]; tensor K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor([0])]; tensor K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_380_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")]; tensor var_382_begin_0 = const()[name = string("op_382_begin_0"), val = tensor([16, 0, 0, 0])]; tensor var_382_end_0 = const()[name = string("op_382_end_0"), val = tensor([17, 8, 2048, 64])]; tensor var_382_end_mask_0 = const()[name = string("op_382_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_382_cast_fp16 = slice_by_index(begin = var_382_begin_0, end = var_382_end_0, end_mask = var_382_end_mask_0, x = coreml_update_state_17)[name = string("op_382_cast_fp16")]; tensor V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor([0])]; tensor V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_382_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")]; tensor x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor([1])]; tensor x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")]; tensor var_391 = const()[name = string("op_391"), val = tensor([1, 4, 1, 1])]; tensor x_13_cast_fp16 = tile(reps = var_391, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_395 = const()[name = string("op_395"), val = tensor([1, -1, 2048, 64])]; tensor var_396_cast_fp16 = reshape(shape = var_395, x = x_13_cast_fp16)[name = string("op_396_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([1])]; tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_398 = const()[name = string("op_398"), val = tensor([1, 4, 1, 1])]; tensor x_19_cast_fp16 = tile(reps = var_398, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")]; bool var_405_transpose_x_0 = const()[name = string("op_405_transpose_x_0"), val = bool(false)]; bool var_405_transpose_y_0 = const()[name = string("op_405_transpose_y_0"), val = bool(true)]; tensor var_405_cast_fp16 = matmul(transpose_x = var_405_transpose_x_0, transpose_y = var_405_transpose_y_0, x = rotated_1, y = var_396_cast_fp16)[name = string("op_405_cast_fp16")]; fp16 var_406_to_fp16 = const()[name = string("op_406_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_1_cast_fp16 = mul(x = var_405_cast_fp16, y = var_406_to_fp16)[name = string("attn_weights_1_cast_fp16")]; tensor x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")]; tensor reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor([-1])]; bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)]; tensor reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")]; tensor x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")]; tensor exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")]; tensor var_417_axes_0 = const()[name = string("op_417_axes_0"), val = tensor([-1])]; bool var_417_keep_dims_0 = const()[name = string("op_417_keep_dims_0"), val = bool(true)]; tensor var_417_cast_fp16 = reduce_sum(axes = var_417_axes_0, keep_dims = var_417_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_417_cast_fp16")]; tensor var_418_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_417_cast_fp16)[name = string("op_418_cast_fp16")]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([32, 64, 2048])]; tensor reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_418_cast_fp16)[name = string("reshape_0_cast_fp16")]; tensor concat_13 = const()[name = string("concat_13"), val = tensor([32, 2048, 64])]; tensor reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 32, 64, 64])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor var_421_perm_0 = const()[name = string("op_421_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_423 = const()[name = string("op_423"), val = tensor([1, 64, 2048])]; tensor var_421_cast_fp16 = transpose(perm = var_421_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_51")]; tensor input_5_cast_fp16 = reshape(shape = var_423, x = var_421_cast_fp16)[name = string("input_5_cast_fp16")]; tensor model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260745472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262842688))))[name = string("model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262850944)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")]; tensor mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor([-1])]; bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)]; tensor mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")]; tensor input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")]; tensor var_434_axes_0 = const()[name = string("op_434_axes_0"), val = tensor([-1])]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262855104)))]; tensor var_434_cast_fp16 = layer_norm(axes = var_434_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_0_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_434_cast_fp16")]; tensor var_441 = const()[name = string("op_441"), val = tensor([0, 2, 1])]; tensor input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor([2])]; tensor var_442 = transpose(perm = var_441, x = var_434_cast_fp16)[name = string("transpose_50")]; tensor input_9 = expand_dims(axes = input_9_axes_0, x = var_442)[name = string("input_9")]; string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")]; tensor input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor([1, 1])]; tensor input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor([1, 1])]; int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)]; tensor input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_0_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")]; string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")]; tensor up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor([1, 1])]; tensor up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor([1, 1])]; int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)]; tensor up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_0_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")]; tensor gate_states_1 = silu(x = input_11)[name = string("gate_states_1")]; tensor input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")]; string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")]; tensor hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)]; tensor hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_0_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")]; tensor var_464_axes_0 = const()[name = string("op_464_axes_0"), val = tensor([2])]; tensor var_464 = squeeze(axes = var_464_axes_0, x = hidden_states_7)[name = string("op_464")]; tensor var_465 = const()[name = string("op_465"), val = tensor([0, 2, 1])]; tensor var_466 = transpose(perm = var_465, x = var_464)[name = string("transpose_49")]; tensor hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_466)[name = string("hidden_states_9_cast_fp16")]; tensor mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor([-1])]; bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)]; tensor mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")]; tensor input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")]; tensor var_474_axes_0 = const()[name = string("op_474_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262859264)))]; tensor var_474_cast_fp16 = layer_norm(axes = var_474_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_1_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_474_cast_fp16")]; tensor var_478 = const()[name = string("op_478"), val = tensor([0, 2, 1])]; tensor var_480_axes_0 = const()[name = string("op_480_axes_0"), val = tensor([2])]; tensor var_479 = transpose(perm = var_478, x = var_474_cast_fp16)[name = string("transpose_48")]; tensor var_480 = expand_dims(axes = var_480_axes_0, x = var_479)[name = string("op_480")]; string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")]; tensor query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor([1, 1])]; tensor query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor([1, 1])]; int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)]; tensor query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_1_self_attn_q_proj_weight_palettized, x = var_480)[name = string("query_states_5")]; string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")]; tensor key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor([1, 1])]; tensor key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor([1, 1])]; int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)]; tensor key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_1_self_attn_k_proj_weight_palettized, x = var_480)[name = string("key_states_7")]; string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")]; tensor value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor([1, 1])]; tensor value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor([1, 1])]; int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)]; tensor value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_1_self_attn_v_proj_weight_palettized, x = var_480)[name = string("value_states_7")]; tensor var_500 = const()[name = string("op_500"), val = tensor([1, 32, 64, 64])]; tensor var_501 = reshape(shape = var_500, x = query_states_5)[name = string("op_501")]; tensor var_502 = const()[name = string("op_502"), val = tensor([0, 1, 3, 2])]; tensor var_504 = const()[name = string("op_504"), val = tensor([1, 8, 64, 64])]; tensor var_505 = reshape(shape = var_504, x = key_states_7)[name = string("op_505")]; tensor var_506 = const()[name = string("op_506"), val = tensor([0, 1, 3, 2])]; tensor var_508 = const()[name = string("op_508"), val = tensor([1, 8, 64, 64])]; tensor var_509 = reshape(shape = var_508, x = value_states_7)[name = string("op_509")]; tensor var_510 = const()[name = string("op_510"), val = tensor([0, 1, 3, 2])]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_29 = transpose(perm = var_502, x = var_501)[name = string("transpose_47")]; tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")]; tensor var_528 = mul(x = x1_5, y = cos_7)[name = string("op_528")]; tensor var_529 = mul(x = x2_5, y = sin_7)[name = string("op_529")]; tensor var_530 = sub(x = var_528, y = var_529)[name = string("op_530")]; tensor var_531 = mul(x = x2_5, y = cos_7)[name = string("op_531")]; tensor var_532 = mul(x = x1_5, y = sin_7)[name = string("op_532")]; tensor var_533 = add(x = var_531, y = var_532)[name = string("op_533")]; bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)]; tensor rotated_5 = concat(axis = var_44, interleave = rotated_5_interleave_0, values = (var_530, var_533))[name = string("rotated_5")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_33 = transpose(perm = var_506, x = var_505)[name = string("transpose_46")]; tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")]; tensor var_549 = mul(x = x1_7, y = cos_7)[name = string("op_549")]; tensor var_550 = mul(x = x2_7, y = sin_7)[name = string("op_550")]; tensor var_551 = sub(x = var_549, y = var_550)[name = string("op_551")]; tensor var_552 = mul(x = x2_7, y = cos_7)[name = string("op_552")]; tensor var_553 = mul(x = x1_7, y = sin_7)[name = string("op_553")]; tensor var_554 = add(x = var_552, y = var_553)[name = string("op_554")]; bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)]; tensor rotated_7 = concat(axis = var_44, interleave = rotated_7_interleave_0, values = (var_551, var_554))[name = string("rotated_7")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([1])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([2])]; int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)]; bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)]; tensor concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")]; tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_366, concat_21_values3_0))[name = string("concat_21")]; tensor model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([17])]; tensor expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor([0])]; tensor expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor([0])]; tensor expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor([18])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")]; tensor concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor([0])]; tensor concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor([0])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_366, concat_25_values3_0))[name = string("concat_25")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_9 = transpose(perm = var_510, x = var_509)[name = string("transpose_45")]; tensor model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")]; tensor var_577_begin_0 = const()[name = string("op_577_begin_0"), val = tensor([1, 0, 0, 0])]; tensor var_577_end_0 = const()[name = string("op_577_end_0"), val = tensor([2, 8, 2048, 64])]; tensor var_577_end_mask_0 = const()[name = string("op_577_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_577_cast_fp16 = slice_by_index(begin = var_577_begin_0, end = var_577_end_0, end_mask = var_577_end_mask_0, x = coreml_update_state_19)[name = string("op_577_cast_fp16")]; tensor K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor([0])]; tensor K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_577_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")]; tensor var_579_begin_0 = const()[name = string("op_579_begin_0"), val = tensor([17, 0, 0, 0])]; tensor var_579_end_0 = const()[name = string("op_579_end_0"), val = tensor([18, 8, 2048, 64])]; tensor var_579_end_mask_0 = const()[name = string("op_579_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_579_cast_fp16 = slice_by_index(begin = var_579_begin_0, end = var_579_end_0, end_mask = var_579_end_mask_0, x = coreml_update_state_19)[name = string("op_579_cast_fp16")]; tensor V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor([0])]; tensor V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_579_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")]; tensor x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor([1])]; tensor x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")]; tensor var_588 = const()[name = string("op_588"), val = tensor([1, 4, 1, 1])]; tensor x_41_cast_fp16 = tile(reps = var_588, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_592 = const()[name = string("op_592"), val = tensor([1, -1, 2048, 64])]; tensor var_593_cast_fp16 = reshape(shape = var_592, x = x_41_cast_fp16)[name = string("op_593_cast_fp16")]; tensor x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor([1])]; tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_595 = const()[name = string("op_595"), val = tensor([1, 4, 1, 1])]; tensor x_47_cast_fp16 = tile(reps = var_595, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")]; bool var_602_transpose_x_0 = const()[name = string("op_602_transpose_x_0"), val = bool(false)]; bool var_602_transpose_y_0 = const()[name = string("op_602_transpose_y_0"), val = bool(true)]; tensor var_602_cast_fp16 = matmul(transpose_x = var_602_transpose_x_0, transpose_y = var_602_transpose_y_0, x = rotated_5, y = var_593_cast_fp16)[name = string("op_602_cast_fp16")]; fp16 var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_3_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = string("attn_weights_3_cast_fp16")]; tensor x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")]; tensor reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor([-1])]; bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)]; tensor reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")]; tensor x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")]; tensor exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")]; tensor var_614_axes_0 = const()[name = string("op_614_axes_0"), val = tensor([-1])]; bool var_614_keep_dims_0 = const()[name = string("op_614_keep_dims_0"), val = bool(true)]; tensor var_614_cast_fp16 = reduce_sum(axes = var_614_axes_0, keep_dims = var_614_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_614_cast_fp16")]; tensor var_615_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_614_cast_fp16)[name = string("op_615_cast_fp16")]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([32, 64, 2048])]; tensor reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_615_cast_fp16)[name = string("reshape_3_cast_fp16")]; tensor concat_31 = const()[name = string("concat_31"), val = tensor([32, 2048, 64])]; tensor reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_35 = const()[name = string("concat_35"), val = tensor([1, 32, 64, 64])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor var_618_perm_0 = const()[name = string("op_618_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_620 = const()[name = string("op_620"), val = tensor([1, 64, 2048])]; tensor var_618_cast_fp16 = transpose(perm = var_618_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_44")]; tensor input_19_cast_fp16 = reshape(shape = var_620, x = var_618_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262863424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264960640))))[name = string("model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor([-1])]; bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)]; tensor mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")]; tensor input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")]; tensor var_631_axes_0 = const()[name = string("op_631_axes_0"), val = tensor([-1])]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264968896)))]; tensor var_631_cast_fp16 = layer_norm(axes = var_631_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_1_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_631_cast_fp16")]; tensor var_638 = const()[name = string("op_638"), val = tensor([0, 2, 1])]; tensor input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor([2])]; tensor var_639 = transpose(perm = var_638, x = var_631_cast_fp16)[name = string("transpose_43")]; tensor input_23 = expand_dims(axes = input_23_axes_0, x = var_639)[name = string("input_23")]; string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")]; tensor input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor([1, 1])]; tensor input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor([1, 1])]; int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)]; tensor input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_1_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")]; string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")]; tensor up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor([1, 1])]; tensor up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor([1, 1])]; int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)]; tensor up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_1_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")]; tensor gate_states_3 = silu(x = input_25)[name = string("gate_states_3")]; tensor input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")]; string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")]; tensor hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)]; tensor hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_1_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")]; tensor var_661_axes_0 = const()[name = string("op_661_axes_0"), val = tensor([2])]; tensor var_661 = squeeze(axes = var_661_axes_0, x = hidden_states_15)[name = string("op_661")]; tensor var_662 = const()[name = string("op_662"), val = tensor([0, 2, 1])]; tensor var_663 = transpose(perm = var_662, x = var_661)[name = string("transpose_42")]; tensor hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_663)[name = string("hidden_states_17_cast_fp16")]; tensor mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor([-1])]; bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)]; tensor mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")]; tensor input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")]; tensor var_671_axes_0 = const()[name = string("op_671_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264973056)))]; tensor var_671_cast_fp16 = layer_norm(axes = var_671_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_2_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_671_cast_fp16")]; tensor var_675 = const()[name = string("op_675"), val = tensor([0, 2, 1])]; tensor var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor([2])]; tensor var_676 = transpose(perm = var_675, x = var_671_cast_fp16)[name = string("transpose_41")]; tensor var_677 = expand_dims(axes = var_677_axes_0, x = var_676)[name = string("op_677")]; string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")]; tensor query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor([1, 1])]; tensor query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor([1, 1])]; int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)]; tensor query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_2_self_attn_q_proj_weight_palettized, x = var_677)[name = string("query_states_9")]; string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")]; tensor key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor([1, 1])]; tensor key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor([1, 1])]; int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)]; tensor key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_2_self_attn_k_proj_weight_palettized, x = var_677)[name = string("key_states_13")]; string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")]; tensor value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor([1, 1])]; tensor value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor([1, 1])]; int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)]; tensor value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_2_self_attn_v_proj_weight_palettized, x = var_677)[name = string("value_states_13")]; tensor var_697 = const()[name = string("op_697"), val = tensor([1, 32, 64, 64])]; tensor var_698 = reshape(shape = var_697, x = query_states_9)[name = string("op_698")]; tensor var_699 = const()[name = string("op_699"), val = tensor([0, 1, 3, 2])]; tensor var_701 = const()[name = string("op_701"), val = tensor([1, 8, 64, 64])]; tensor var_702 = reshape(shape = var_701, x = key_states_13)[name = string("op_702")]; tensor var_703 = const()[name = string("op_703"), val = tensor([0, 1, 3, 2])]; tensor var_705 = const()[name = string("op_705"), val = tensor([1, 8, 64, 64])]; tensor var_706 = reshape(shape = var_705, x = value_states_13)[name = string("op_706")]; tensor var_707 = const()[name = string("op_707"), val = tensor([0, 1, 3, 2])]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_57 = transpose(perm = var_699, x = var_698)[name = string("transpose_40")]; tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")]; tensor var_725 = mul(x = x1_9, y = cos_7)[name = string("op_725")]; tensor var_726 = mul(x = x2_9, y = sin_7)[name = string("op_726")]; tensor var_727 = sub(x = var_725, y = var_726)[name = string("op_727")]; tensor var_728 = mul(x = x2_9, y = cos_7)[name = string("op_728")]; tensor var_729 = mul(x = x1_9, y = sin_7)[name = string("op_729")]; tensor var_730 = add(x = var_728, y = var_729)[name = string("op_730")]; bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)]; tensor rotated_9 = concat(axis = var_44, interleave = rotated_9_interleave_0, values = (var_727, var_730))[name = string("rotated_9")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_61 = transpose(perm = var_703, x = var_702)[name = string("transpose_39")]; tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")]; tensor var_746 = mul(x = x1_11, y = cos_7)[name = string("op_746")]; tensor var_747 = mul(x = x2_11, y = sin_7)[name = string("op_747")]; tensor var_748 = sub(x = var_746, y = var_747)[name = string("op_748")]; tensor var_749 = mul(x = x2_11, y = cos_7)[name = string("op_749")]; tensor var_750 = mul(x = x1_11, y = sin_7)[name = string("op_750")]; tensor var_751 = add(x = var_749, y = var_750)[name = string("op_751")]; bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)]; tensor rotated_11 = concat(axis = var_44, interleave = rotated_11_interleave_0, values = (var_748, var_751))[name = string("rotated_11")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([2])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor([3])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")]; tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_366, concat_39_values3_0))[name = string("concat_39")]; tensor model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")]; tensor expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor([18])]; tensor expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor([0])]; tensor expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([19])]; int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")]; tensor concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor([0])]; tensor concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor([0])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_366, concat_43_values3_0))[name = string("concat_43")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_15 = transpose(perm = var_707, x = var_706)[name = string("transpose_38")]; tensor model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")]; tensor var_774_begin_0 = const()[name = string("op_774_begin_0"), val = tensor([2, 0, 0, 0])]; tensor var_774_end_0 = const()[name = string("op_774_end_0"), val = tensor([3, 8, 2048, 64])]; tensor var_774_end_mask_0 = const()[name = string("op_774_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_774_cast_fp16 = slice_by_index(begin = var_774_begin_0, end = var_774_end_0, end_mask = var_774_end_mask_0, x = coreml_update_state_21)[name = string("op_774_cast_fp16")]; tensor K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor([0])]; tensor K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_774_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")]; tensor var_776_begin_0 = const()[name = string("op_776_begin_0"), val = tensor([18, 0, 0, 0])]; tensor var_776_end_0 = const()[name = string("op_776_end_0"), val = tensor([19, 8, 2048, 64])]; tensor var_776_end_mask_0 = const()[name = string("op_776_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_776_cast_fp16 = slice_by_index(begin = var_776_begin_0, end = var_776_end_0, end_mask = var_776_end_mask_0, x = coreml_update_state_21)[name = string("op_776_cast_fp16")]; tensor V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor([0])]; tensor V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_776_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")]; tensor x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor([1])]; tensor x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_785 = const()[name = string("op_785"), val = tensor([1, 4, 1, 1])]; tensor x_69_cast_fp16 = tile(reps = var_785, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_789 = const()[name = string("op_789"), val = tensor([1, -1, 2048, 64])]; tensor var_790_cast_fp16 = reshape(shape = var_789, x = x_69_cast_fp16)[name = string("op_790_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([1])]; tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_792 = const()[name = string("op_792"), val = tensor([1, 4, 1, 1])]; tensor x_75_cast_fp16 = tile(reps = var_792, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")]; bool var_799_transpose_x_0 = const()[name = string("op_799_transpose_x_0"), val = bool(false)]; bool var_799_transpose_y_0 = const()[name = string("op_799_transpose_y_0"), val = bool(true)]; tensor var_799_cast_fp16 = matmul(transpose_x = var_799_transpose_x_0, transpose_y = var_799_transpose_y_0, x = rotated_9, y = var_790_cast_fp16)[name = string("op_799_cast_fp16")]; fp16 var_800_to_fp16 = const()[name = string("op_800_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_5_cast_fp16 = mul(x = var_799_cast_fp16, y = var_800_to_fp16)[name = string("attn_weights_5_cast_fp16")]; tensor x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")]; tensor reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor([-1])]; bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)]; tensor reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")]; tensor x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")]; tensor exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")]; tensor var_811_axes_0 = const()[name = string("op_811_axes_0"), val = tensor([-1])]; bool var_811_keep_dims_0 = const()[name = string("op_811_keep_dims_0"), val = bool(true)]; tensor var_811_cast_fp16 = reduce_sum(axes = var_811_axes_0, keep_dims = var_811_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_811_cast_fp16")]; tensor var_812_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_811_cast_fp16)[name = string("op_812_cast_fp16")]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([32, 64, 2048])]; tensor reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_812_cast_fp16)[name = string("reshape_6_cast_fp16")]; tensor concat_49 = const()[name = string("concat_49"), val = tensor([32, 2048, 64])]; tensor reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_53 = const()[name = string("concat_53"), val = tensor([1, 32, 64, 64])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor var_815_perm_0 = const()[name = string("op_815_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_817 = const()[name = string("op_817"), val = tensor([1, 64, 2048])]; tensor var_815_cast_fp16 = transpose(perm = var_815_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_37")]; tensor input_33_cast_fp16 = reshape(shape = var_817, x = var_815_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264977216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267074432))))[name = string("model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor([-1])]; bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)]; tensor mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")]; tensor input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")]; tensor var_828_axes_0 = const()[name = string("op_828_axes_0"), val = tensor([-1])]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267082688)))]; tensor var_828_cast_fp16 = layer_norm(axes = var_828_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_2_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_828_cast_fp16")]; tensor var_835 = const()[name = string("op_835"), val = tensor([0, 2, 1])]; tensor input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor([2])]; tensor var_836 = transpose(perm = var_835, x = var_828_cast_fp16)[name = string("transpose_36")]; tensor input_37 = expand_dims(axes = input_37_axes_0, x = var_836)[name = string("input_37")]; string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")]; tensor input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor([1, 1])]; tensor input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor([1, 1])]; int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)]; tensor input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_2_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")]; string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")]; tensor up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor([1, 1])]; tensor up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor([1, 1])]; int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)]; tensor up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_2_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")]; tensor gate_states_5 = silu(x = input_39)[name = string("gate_states_5")]; tensor input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")]; string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")]; tensor hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)]; tensor hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_2_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")]; tensor var_858_axes_0 = const()[name = string("op_858_axes_0"), val = tensor([2])]; tensor var_858 = squeeze(axes = var_858_axes_0, x = hidden_states_23)[name = string("op_858")]; tensor var_859 = const()[name = string("op_859"), val = tensor([0, 2, 1])]; tensor var_860 = transpose(perm = var_859, x = var_858)[name = string("transpose_35")]; tensor hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_860)[name = string("hidden_states_25_cast_fp16")]; tensor mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor([-1])]; bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)]; tensor mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")]; tensor input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")]; tensor var_868_axes_0 = const()[name = string("op_868_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267086848)))]; tensor var_868_cast_fp16 = layer_norm(axes = var_868_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_3_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_868_cast_fp16")]; tensor var_872 = const()[name = string("op_872"), val = tensor([0, 2, 1])]; tensor var_874_axes_0 = const()[name = string("op_874_axes_0"), val = tensor([2])]; tensor var_873 = transpose(perm = var_872, x = var_868_cast_fp16)[name = string("transpose_34")]; tensor var_874 = expand_dims(axes = var_874_axes_0, x = var_873)[name = string("op_874")]; string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")]; tensor query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor([1, 1])]; tensor query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor([1, 1])]; int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)]; tensor query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_3_self_attn_q_proj_weight_palettized, x = var_874)[name = string("query_states_13")]; string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")]; tensor key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor([1, 1])]; tensor key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor([1, 1])]; int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)]; tensor key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_3_self_attn_k_proj_weight_palettized, x = var_874)[name = string("key_states_19")]; string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")]; tensor value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor([1, 1])]; tensor value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor([1, 1])]; int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)]; tensor value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_3_self_attn_v_proj_weight_palettized, x = var_874)[name = string("value_states_19")]; tensor var_894 = const()[name = string("op_894"), val = tensor([1, 32, 64, 64])]; tensor var_895 = reshape(shape = var_894, x = query_states_13)[name = string("op_895")]; tensor var_896 = const()[name = string("op_896"), val = tensor([0, 1, 3, 2])]; tensor var_898 = const()[name = string("op_898"), val = tensor([1, 8, 64, 64])]; tensor var_899 = reshape(shape = var_898, x = key_states_19)[name = string("op_899")]; tensor var_900 = const()[name = string("op_900"), val = tensor([0, 1, 3, 2])]; tensor var_902 = const()[name = string("op_902"), val = tensor([1, 8, 64, 64])]; tensor var_903 = reshape(shape = var_902, x = value_states_19)[name = string("op_903")]; tensor var_904 = const()[name = string("op_904"), val = tensor([0, 1, 3, 2])]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_85 = transpose(perm = var_896, x = var_895)[name = string("transpose_33")]; tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")]; tensor var_922 = mul(x = x1_13, y = cos_7)[name = string("op_922")]; tensor var_923 = mul(x = x2_13, y = sin_7)[name = string("op_923")]; tensor var_924 = sub(x = var_922, y = var_923)[name = string("op_924")]; tensor var_925 = mul(x = x2_13, y = cos_7)[name = string("op_925")]; tensor var_926 = mul(x = x1_13, y = sin_7)[name = string("op_926")]; tensor var_927 = add(x = var_925, y = var_926)[name = string("op_927")]; bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)]; tensor rotated_13 = concat(axis = var_44, interleave = rotated_13_interleave_0, values = (var_924, var_927))[name = string("rotated_13")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_89 = transpose(perm = var_900, x = var_899)[name = string("transpose_32")]; tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")]; tensor var_943 = mul(x = x1_15, y = cos_7)[name = string("op_943")]; tensor var_944 = mul(x = x2_15, y = sin_7)[name = string("op_944")]; tensor var_945 = sub(x = var_943, y = var_944)[name = string("op_945")]; tensor var_946 = mul(x = x2_15, y = cos_7)[name = string("op_946")]; tensor var_947 = mul(x = x1_15, y = sin_7)[name = string("op_947")]; tensor var_948 = add(x = var_946, y = var_947)[name = string("op_948")]; bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)]; tensor rotated_15 = concat(axis = var_44, interleave = rotated_15_interleave_0, values = (var_945, var_948))[name = string("rotated_15")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([3])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor([4])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")]; tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_366, concat_57_values3_0))[name = string("concat_57")]; tensor model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")]; tensor expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor([19])]; tensor expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor([0])]; tensor expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor([0])]; tensor expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor([20])]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")]; tensor concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor([0])]; tensor concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor([0])]; int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)]; bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)]; tensor concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_366, concat_61_values3_0))[name = string("concat_61")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_21 = transpose(perm = var_904, x = var_903)[name = string("transpose_31")]; tensor model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")]; tensor var_971_begin_0 = const()[name = string("op_971_begin_0"), val = tensor([3, 0, 0, 0])]; tensor var_971_end_0 = const()[name = string("op_971_end_0"), val = tensor([4, 8, 2048, 64])]; tensor var_971_end_mask_0 = const()[name = string("op_971_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_971_cast_fp16 = slice_by_index(begin = var_971_begin_0, end = var_971_end_0, end_mask = var_971_end_mask_0, x = coreml_update_state_23)[name = string("op_971_cast_fp16")]; tensor K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor([0])]; tensor K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_971_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")]; tensor var_973_begin_0 = const()[name = string("op_973_begin_0"), val = tensor([19, 0, 0, 0])]; tensor var_973_end_0 = const()[name = string("op_973_end_0"), val = tensor([20, 8, 2048, 64])]; tensor var_973_end_mask_0 = const()[name = string("op_973_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_973_cast_fp16 = slice_by_index(begin = var_973_begin_0, end = var_973_end_0, end_mask = var_973_end_mask_0, x = coreml_update_state_23)[name = string("op_973_cast_fp16")]; tensor V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor([0])]; tensor V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_973_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")]; tensor x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor([1])]; tensor x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")]; tensor var_982 = const()[name = string("op_982"), val = tensor([1, 4, 1, 1])]; tensor x_97_cast_fp16 = tile(reps = var_982, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_986 = const()[name = string("op_986"), val = tensor([1, -1, 2048, 64])]; tensor var_987_cast_fp16 = reshape(shape = var_986, x = x_97_cast_fp16)[name = string("op_987_cast_fp16")]; tensor x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor([1])]; tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")]; tensor var_989 = const()[name = string("op_989"), val = tensor([1, 4, 1, 1])]; tensor x_103_cast_fp16 = tile(reps = var_989, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")]; bool var_996_transpose_x_0 = const()[name = string("op_996_transpose_x_0"), val = bool(false)]; bool var_996_transpose_y_0 = const()[name = string("op_996_transpose_y_0"), val = bool(true)]; tensor var_996_cast_fp16 = matmul(transpose_x = var_996_transpose_x_0, transpose_y = var_996_transpose_y_0, x = rotated_13, y = var_987_cast_fp16)[name = string("op_996_cast_fp16")]; fp16 var_997_to_fp16 = const()[name = string("op_997_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_7_cast_fp16 = mul(x = var_996_cast_fp16, y = var_997_to_fp16)[name = string("attn_weights_7_cast_fp16")]; tensor x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")]; tensor reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor([-1])]; bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)]; tensor reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")]; tensor x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")]; tensor exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")]; tensor var_1008_axes_0 = const()[name = string("op_1008_axes_0"), val = tensor([-1])]; bool var_1008_keep_dims_0 = const()[name = string("op_1008_keep_dims_0"), val = bool(true)]; tensor var_1008_cast_fp16 = reduce_sum(axes = var_1008_axes_0, keep_dims = var_1008_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_1008_cast_fp16")]; tensor var_1009_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_1008_cast_fp16)[name = string("op_1009_cast_fp16")]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([32, 64, 2048])]; tensor reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_1009_cast_fp16)[name = string("reshape_9_cast_fp16")]; tensor concat_67 = const()[name = string("concat_67"), val = tensor([32, 2048, 64])]; tensor reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_71 = const()[name = string("concat_71"), val = tensor([1, 32, 64, 64])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor var_1012_perm_0 = const()[name = string("op_1012_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1014 = const()[name = string("op_1014"), val = tensor([1, 64, 2048])]; tensor var_1012_cast_fp16 = transpose(perm = var_1012_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_30")]; tensor input_47_cast_fp16 = reshape(shape = var_1014, x = var_1012_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267091008))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269188224))))[name = string("model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; tensor mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor([-1])]; bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)]; tensor mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")]; tensor input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")]; tensor var_1025_axes_0 = const()[name = string("op_1025_axes_0"), val = tensor([-1])]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269196480)))]; tensor var_1025_cast_fp16 = layer_norm(axes = var_1025_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_3_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1025_cast_fp16")]; tensor var_1032 = const()[name = string("op_1032"), val = tensor([0, 2, 1])]; tensor input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor([2])]; tensor var_1033 = transpose(perm = var_1032, x = var_1025_cast_fp16)[name = string("transpose_29")]; tensor input_51 = expand_dims(axes = input_51_axes_0, x = var_1033)[name = string("input_51")]; string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")]; tensor input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor([1, 1])]; int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)]; tensor input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_3_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")]; string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")]; tensor up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor([1, 1])]; tensor up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor([1, 1])]; int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)]; tensor up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_3_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")]; tensor gate_states_7 = silu(x = input_53)[name = string("gate_states_7")]; tensor input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")]; string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")]; tensor hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor([1, 1])]; tensor hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)]; tensor hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_3_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")]; tensor var_1055_axes_0 = const()[name = string("op_1055_axes_0"), val = tensor([2])]; tensor var_1055 = squeeze(axes = var_1055_axes_0, x = hidden_states_31)[name = string("op_1055")]; tensor var_1056 = const()[name = string("op_1056"), val = tensor([0, 2, 1])]; tensor var_1057 = transpose(perm = var_1056, x = var_1055)[name = string("transpose_28")]; tensor hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1057)[name = string("hidden_states_33_cast_fp16")]; tensor mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor([-1])]; bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)]; tensor mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")]; tensor input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")]; tensor var_1065_axes_0 = const()[name = string("op_1065_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269200640)))]; tensor var_1065_cast_fp16 = layer_norm(axes = var_1065_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_4_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1065_cast_fp16")]; tensor var_1069 = const()[name = string("op_1069"), val = tensor([0, 2, 1])]; tensor var_1071_axes_0 = const()[name = string("op_1071_axes_0"), val = tensor([2])]; tensor var_1070 = transpose(perm = var_1069, x = var_1065_cast_fp16)[name = string("transpose_27")]; tensor var_1071 = expand_dims(axes = var_1071_axes_0, x = var_1070)[name = string("op_1071")]; string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")]; tensor query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor([1, 1])]; tensor query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor([1, 1])]; int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)]; tensor query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_4_self_attn_q_proj_weight_palettized, x = var_1071)[name = string("query_states_17")]; string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")]; tensor key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor([1, 1])]; tensor key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor([1, 1])]; int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)]; tensor key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_4_self_attn_k_proj_weight_palettized, x = var_1071)[name = string("key_states_25")]; string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")]; tensor value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor([1, 1])]; tensor value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor([1, 1])]; int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)]; tensor value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_4_self_attn_v_proj_weight_palettized, x = var_1071)[name = string("value_states_25")]; tensor var_1091 = const()[name = string("op_1091"), val = tensor([1, 32, 64, 64])]; tensor var_1092 = reshape(shape = var_1091, x = query_states_17)[name = string("op_1092")]; tensor var_1093 = const()[name = string("op_1093"), val = tensor([0, 1, 3, 2])]; tensor var_1095 = const()[name = string("op_1095"), val = tensor([1, 8, 64, 64])]; tensor var_1096 = reshape(shape = var_1095, x = key_states_25)[name = string("op_1096")]; tensor var_1097 = const()[name = string("op_1097"), val = tensor([0, 1, 3, 2])]; tensor var_1099 = const()[name = string("op_1099"), val = tensor([1, 8, 64, 64])]; tensor var_1100 = reshape(shape = var_1099, x = value_states_25)[name = string("op_1100")]; tensor var_1101 = const()[name = string("op_1101"), val = tensor([0, 1, 3, 2])]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_113 = transpose(perm = var_1093, x = var_1092)[name = string("transpose_26")]; tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")]; tensor var_1119 = mul(x = x1_17, y = cos_7)[name = string("op_1119")]; tensor var_1120 = mul(x = x2_17, y = sin_7)[name = string("op_1120")]; tensor var_1121 = sub(x = var_1119, y = var_1120)[name = string("op_1121")]; tensor var_1122 = mul(x = x2_17, y = cos_7)[name = string("op_1122")]; tensor var_1123 = mul(x = x1_17, y = sin_7)[name = string("op_1123")]; tensor var_1124 = add(x = var_1122, y = var_1123)[name = string("op_1124")]; bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)]; tensor rotated_17 = concat(axis = var_44, interleave = rotated_17_interleave_0, values = (var_1121, var_1124))[name = string("rotated_17")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_117 = transpose(perm = var_1097, x = var_1096)[name = string("transpose_25")]; tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")]; tensor var_1140 = mul(x = x1_19, y = cos_7)[name = string("op_1140")]; tensor var_1141 = mul(x = x2_19, y = sin_7)[name = string("op_1141")]; tensor var_1142 = sub(x = var_1140, y = var_1141)[name = string("op_1142")]; tensor var_1143 = mul(x = x2_19, y = cos_7)[name = string("op_1143")]; tensor var_1144 = mul(x = x1_19, y = sin_7)[name = string("op_1144")]; tensor var_1145 = add(x = var_1143, y = var_1144)[name = string("op_1145")]; bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)]; tensor rotated_19 = concat(axis = var_44, interleave = rotated_19_interleave_0, values = (var_1142, var_1145))[name = string("rotated_19")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([4])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor([5])]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")]; tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_366, concat_75_values3_0))[name = string("concat_75")]; tensor model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")]; tensor expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor([20])]; tensor expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor([0])]; tensor expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor([0])]; tensor expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor([21])]; int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")]; tensor concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor([0])]; tensor concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor([0])]; int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)]; bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)]; tensor concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_366, concat_79_values3_0))[name = string("concat_79")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_27 = transpose(perm = var_1101, x = var_1100)[name = string("transpose_24")]; tensor model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")]; tensor var_1168_begin_0 = const()[name = string("op_1168_begin_0"), val = tensor([4, 0, 0, 0])]; tensor var_1168_end_0 = const()[name = string("op_1168_end_0"), val = tensor([5, 8, 2048, 64])]; tensor var_1168_end_mask_0 = const()[name = string("op_1168_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1168_cast_fp16 = slice_by_index(begin = var_1168_begin_0, end = var_1168_end_0, end_mask = var_1168_end_mask_0, x = coreml_update_state_25)[name = string("op_1168_cast_fp16")]; tensor K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor([0])]; tensor K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1168_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")]; tensor var_1170_begin_0 = const()[name = string("op_1170_begin_0"), val = tensor([20, 0, 0, 0])]; tensor var_1170_end_0 = const()[name = string("op_1170_end_0"), val = tensor([21, 8, 2048, 64])]; tensor var_1170_end_mask_0 = const()[name = string("op_1170_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = coreml_update_state_25)[name = string("op_1170_cast_fp16")]; tensor V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor([0])]; tensor V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1170_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")]; tensor x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor([1])]; tensor x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1179 = const()[name = string("op_1179"), val = tensor([1, 4, 1, 1])]; tensor x_125_cast_fp16 = tile(reps = var_1179, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")]; tensor var_1183 = const()[name = string("op_1183"), val = tensor([1, -1, 2048, 64])]; tensor var_1184_cast_fp16 = reshape(shape = var_1183, x = x_125_cast_fp16)[name = string("op_1184_cast_fp16")]; tensor x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor([1])]; tensor x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")]; tensor var_1186 = const()[name = string("op_1186"), val = tensor([1, 4, 1, 1])]; tensor x_131_cast_fp16 = tile(reps = var_1186, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")]; bool var_1193_transpose_x_0 = const()[name = string("op_1193_transpose_x_0"), val = bool(false)]; bool var_1193_transpose_y_0 = const()[name = string("op_1193_transpose_y_0"), val = bool(true)]; tensor var_1193_cast_fp16 = matmul(transpose_x = var_1193_transpose_x_0, transpose_y = var_1193_transpose_y_0, x = rotated_17, y = var_1184_cast_fp16)[name = string("op_1193_cast_fp16")]; fp16 var_1194_to_fp16 = const()[name = string("op_1194_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_9_cast_fp16 = mul(x = var_1193_cast_fp16, y = var_1194_to_fp16)[name = string("attn_weights_9_cast_fp16")]; tensor x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")]; tensor reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor([-1])]; bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)]; tensor reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")]; tensor x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")]; tensor exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")]; tensor var_1205_axes_0 = const()[name = string("op_1205_axes_0"), val = tensor([-1])]; bool var_1205_keep_dims_0 = const()[name = string("op_1205_keep_dims_0"), val = bool(true)]; tensor var_1205_cast_fp16 = reduce_sum(axes = var_1205_axes_0, keep_dims = var_1205_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1205_cast_fp16")]; tensor var_1206_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1205_cast_fp16)[name = string("op_1206_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([32, 64, 2048])]; tensor reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1206_cast_fp16)[name = string("reshape_12_cast_fp16")]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([32, 2048, 64])]; tensor reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_89 = const()[name = string("concat_89"), val = tensor([1, 32, 64, 64])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor var_1209_perm_0 = const()[name = string("op_1209_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1211 = const()[name = string("op_1211"), val = tensor([1, 64, 2048])]; tensor var_1209_cast_fp16 = transpose(perm = var_1209_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_23")]; tensor input_61_cast_fp16 = reshape(shape = var_1211, x = var_1209_cast_fp16)[name = string("input_61_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269204800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271302016))))[name = string("model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor([-1])]; bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)]; tensor mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")]; tensor input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")]; tensor var_1222_axes_0 = const()[name = string("op_1222_axes_0"), val = tensor([-1])]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271310272)))]; tensor var_1222_cast_fp16 = layer_norm(axes = var_1222_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_4_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1222_cast_fp16")]; tensor var_1229 = const()[name = string("op_1229"), val = tensor([0, 2, 1])]; tensor input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor([2])]; tensor var_1230 = transpose(perm = var_1229, x = var_1222_cast_fp16)[name = string("transpose_22")]; tensor input_65 = expand_dims(axes = input_65_axes_0, x = var_1230)[name = string("input_65")]; string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")]; tensor input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor([1, 1])]; tensor input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor([1, 1])]; int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)]; tensor input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_4_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")]; string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")]; tensor up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor([1, 1])]; tensor up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor([1, 1])]; int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)]; tensor up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_4_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")]; tensor gate_states_9 = silu(x = input_67)[name = string("gate_states_9")]; tensor input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")]; string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")]; tensor hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor([1, 1])]; tensor hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)]; tensor hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_4_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")]; tensor var_1252_axes_0 = const()[name = string("op_1252_axes_0"), val = tensor([2])]; tensor var_1252 = squeeze(axes = var_1252_axes_0, x = hidden_states_39)[name = string("op_1252")]; tensor var_1253 = const()[name = string("op_1253"), val = tensor([0, 2, 1])]; tensor var_1254 = transpose(perm = var_1253, x = var_1252)[name = string("transpose_21")]; tensor hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1254)[name = string("hidden_states_41_cast_fp16")]; tensor mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor([-1])]; bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)]; tensor mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")]; tensor input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")]; tensor var_1262_axes_0 = const()[name = string("op_1262_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271314432)))]; tensor var_1262_cast_fp16 = layer_norm(axes = var_1262_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_5_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1262_cast_fp16")]; tensor var_1266 = const()[name = string("op_1266"), val = tensor([0, 2, 1])]; tensor var_1268_axes_0 = const()[name = string("op_1268_axes_0"), val = tensor([2])]; tensor var_1267 = transpose(perm = var_1266, x = var_1262_cast_fp16)[name = string("transpose_20")]; tensor var_1268 = expand_dims(axes = var_1268_axes_0, x = var_1267)[name = string("op_1268")]; string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")]; tensor query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor([1, 1])]; tensor query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor([1, 1])]; int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)]; tensor query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_5_self_attn_q_proj_weight_palettized, x = var_1268)[name = string("query_states_21")]; string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")]; tensor key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor([1, 1])]; tensor key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor([1, 1])]; int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)]; tensor key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_5_self_attn_k_proj_weight_palettized, x = var_1268)[name = string("key_states_31")]; string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")]; tensor value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor([1, 1])]; tensor value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor([1, 1])]; int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)]; tensor value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_5_self_attn_v_proj_weight_palettized, x = var_1268)[name = string("value_states_31")]; tensor var_1288 = const()[name = string("op_1288"), val = tensor([1, 32, 64, 64])]; tensor var_1289 = reshape(shape = var_1288, x = query_states_21)[name = string("op_1289")]; tensor var_1290 = const()[name = string("op_1290"), val = tensor([0, 1, 3, 2])]; tensor var_1292 = const()[name = string("op_1292"), val = tensor([1, 8, 64, 64])]; tensor var_1293 = reshape(shape = var_1292, x = key_states_31)[name = string("op_1293")]; tensor var_1294 = const()[name = string("op_1294"), val = tensor([0, 1, 3, 2])]; tensor var_1296 = const()[name = string("op_1296"), val = tensor([1, 8, 64, 64])]; tensor var_1297 = reshape(shape = var_1296, x = value_states_31)[name = string("op_1297")]; tensor var_1298 = const()[name = string("op_1298"), val = tensor([0, 1, 3, 2])]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_141 = transpose(perm = var_1290, x = var_1289)[name = string("transpose_19")]; tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")]; tensor var_1316 = mul(x = x1_21, y = cos_7)[name = string("op_1316")]; tensor var_1317 = mul(x = x2_21, y = sin_7)[name = string("op_1317")]; tensor var_1318 = sub(x = var_1316, y = var_1317)[name = string("op_1318")]; tensor var_1319 = mul(x = x2_21, y = cos_7)[name = string("op_1319")]; tensor var_1320 = mul(x = x1_21, y = sin_7)[name = string("op_1320")]; tensor var_1321 = add(x = var_1319, y = var_1320)[name = string("op_1321")]; bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)]; tensor rotated_21 = concat(axis = var_44, interleave = rotated_21_interleave_0, values = (var_1318, var_1321))[name = string("rotated_21")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_145 = transpose(perm = var_1294, x = var_1293)[name = string("transpose_18")]; tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")]; tensor var_1337 = mul(x = x1_23, y = cos_7)[name = string("op_1337")]; tensor var_1338 = mul(x = x2_23, y = sin_7)[name = string("op_1338")]; tensor var_1339 = sub(x = var_1337, y = var_1338)[name = string("op_1339")]; tensor var_1340 = mul(x = x2_23, y = cos_7)[name = string("op_1340")]; tensor var_1341 = mul(x = x1_23, y = sin_7)[name = string("op_1341")]; tensor var_1342 = add(x = var_1340, y = var_1341)[name = string("op_1342")]; bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)]; tensor rotated_23 = concat(axis = var_44, interleave = rotated_23_interleave_0, values = (var_1339, var_1342))[name = string("rotated_23")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([5])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([6])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_366, concat_93_values3_0))[name = string("concat_93")]; tensor model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([21])]; tensor expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor([0])]; tensor expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor([0])]; tensor expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor([22])]; int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")]; tensor concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor([0])]; tensor concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor([0])]; int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)]; bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)]; tensor concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_366, concat_97_values3_0))[name = string("concat_97")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_33 = transpose(perm = var_1298, x = var_1297)[name = string("transpose_17")]; tensor model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")]; tensor var_1365_begin_0 = const()[name = string("op_1365_begin_0"), val = tensor([5, 0, 0, 0])]; tensor var_1365_end_0 = const()[name = string("op_1365_end_0"), val = tensor([6, 8, 2048, 64])]; tensor var_1365_end_mask_0 = const()[name = string("op_1365_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1365_cast_fp16 = slice_by_index(begin = var_1365_begin_0, end = var_1365_end_0, end_mask = var_1365_end_mask_0, x = coreml_update_state_27)[name = string("op_1365_cast_fp16")]; tensor K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor([0])]; tensor K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1365_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")]; tensor var_1367_begin_0 = const()[name = string("op_1367_begin_0"), val = tensor([21, 0, 0, 0])]; tensor var_1367_end_0 = const()[name = string("op_1367_end_0"), val = tensor([22, 8, 2048, 64])]; tensor var_1367_end_mask_0 = const()[name = string("op_1367_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1367_cast_fp16 = slice_by_index(begin = var_1367_begin_0, end = var_1367_end_0, end_mask = var_1367_end_mask_0, x = coreml_update_state_27)[name = string("op_1367_cast_fp16")]; tensor V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor([0])]; tensor V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1367_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")]; tensor x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor([1])]; tensor x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1376 = const()[name = string("op_1376"), val = tensor([1, 4, 1, 1])]; tensor x_153_cast_fp16 = tile(reps = var_1376, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1380 = const()[name = string("op_1380"), val = tensor([1, -1, 2048, 64])]; tensor var_1381_cast_fp16 = reshape(shape = var_1380, x = x_153_cast_fp16)[name = string("op_1381_cast_fp16")]; tensor x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor([1])]; tensor x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1383 = const()[name = string("op_1383"), val = tensor([1, 4, 1, 1])]; tensor x_159_cast_fp16 = tile(reps = var_1383, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")]; bool var_1390_transpose_x_0 = const()[name = string("op_1390_transpose_x_0"), val = bool(false)]; bool var_1390_transpose_y_0 = const()[name = string("op_1390_transpose_y_0"), val = bool(true)]; tensor var_1390_cast_fp16 = matmul(transpose_x = var_1390_transpose_x_0, transpose_y = var_1390_transpose_y_0, x = rotated_21, y = var_1381_cast_fp16)[name = string("op_1390_cast_fp16")]; fp16 var_1391_to_fp16 = const()[name = string("op_1391_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_11_cast_fp16 = mul(x = var_1390_cast_fp16, y = var_1391_to_fp16)[name = string("attn_weights_11_cast_fp16")]; tensor x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")]; tensor reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor([-1])]; bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)]; tensor reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")]; tensor x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")]; tensor exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")]; tensor var_1402_axes_0 = const()[name = string("op_1402_axes_0"), val = tensor([-1])]; bool var_1402_keep_dims_0 = const()[name = string("op_1402_keep_dims_0"), val = bool(true)]; tensor var_1402_cast_fp16 = reduce_sum(axes = var_1402_axes_0, keep_dims = var_1402_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1402_cast_fp16")]; tensor var_1403_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1402_cast_fp16)[name = string("op_1403_cast_fp16")]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([32, 64, 2048])]; tensor reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1403_cast_fp16)[name = string("reshape_15_cast_fp16")]; tensor concat_103 = const()[name = string("concat_103"), val = tensor([32, 2048, 64])]; tensor reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([1, 32, 64, 64])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor var_1406_perm_0 = const()[name = string("op_1406_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1408 = const()[name = string("op_1408"), val = tensor([1, 64, 2048])]; tensor var_1406_cast_fp16 = transpose(perm = var_1406_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_16")]; tensor input_75_cast_fp16 = reshape(shape = var_1408, x = var_1406_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271318592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273415808))))[name = string("model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor([-1])]; bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)]; tensor mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")]; tensor input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")]; tensor var_1419_axes_0 = const()[name = string("op_1419_axes_0"), val = tensor([-1])]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273424064)))]; tensor var_1419_cast_fp16 = layer_norm(axes = var_1419_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_5_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1419_cast_fp16")]; tensor var_1426 = const()[name = string("op_1426"), val = tensor([0, 2, 1])]; tensor input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor([2])]; tensor var_1427 = transpose(perm = var_1426, x = var_1419_cast_fp16)[name = string("transpose_15")]; tensor input_79 = expand_dims(axes = input_79_axes_0, x = var_1427)[name = string("input_79")]; string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")]; tensor input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor([1, 1])]; tensor input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor([1, 1])]; int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)]; tensor input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_5_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")]; string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")]; tensor up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor([1, 1])]; tensor up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor([1, 1])]; int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)]; tensor up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_5_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")]; tensor gate_states_11 = silu(x = input_81)[name = string("gate_states_11")]; tensor input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")]; string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")]; tensor hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor([1, 1])]; tensor hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)]; tensor hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_5_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")]; tensor var_1449_axes_0 = const()[name = string("op_1449_axes_0"), val = tensor([2])]; tensor var_1449 = squeeze(axes = var_1449_axes_0, x = hidden_states_47)[name = string("op_1449")]; tensor var_1450 = const()[name = string("op_1450"), val = tensor([0, 2, 1])]; tensor var_1451 = transpose(perm = var_1450, x = var_1449)[name = string("transpose_14")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1451)[name = string("hidden_states_49_cast_fp16")]; tensor mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor([-1])]; bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)]; tensor mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")]; tensor input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")]; tensor var_1459_axes_0 = const()[name = string("op_1459_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273428224)))]; tensor var_1459_cast_fp16 = layer_norm(axes = var_1459_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_6_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1459_cast_fp16")]; tensor var_1463 = const()[name = string("op_1463"), val = tensor([0, 2, 1])]; tensor var_1465_axes_0 = const()[name = string("op_1465_axes_0"), val = tensor([2])]; tensor var_1464 = transpose(perm = var_1463, x = var_1459_cast_fp16)[name = string("transpose_13")]; tensor var_1465 = expand_dims(axes = var_1465_axes_0, x = var_1464)[name = string("op_1465")]; string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")]; tensor query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor([1, 1])]; tensor query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor([1, 1])]; int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)]; tensor query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_6_self_attn_q_proj_weight_palettized, x = var_1465)[name = string("query_states_25")]; string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")]; tensor key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor([1, 1])]; tensor key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor([1, 1])]; int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)]; tensor key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_6_self_attn_k_proj_weight_palettized, x = var_1465)[name = string("key_states_37")]; string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")]; tensor value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor([1, 1])]; tensor value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor([1, 1])]; int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)]; tensor value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_6_self_attn_v_proj_weight_palettized, x = var_1465)[name = string("value_states_37")]; tensor var_1485 = const()[name = string("op_1485"), val = tensor([1, 32, 64, 64])]; tensor var_1486 = reshape(shape = var_1485, x = query_states_25)[name = string("op_1486")]; tensor var_1487 = const()[name = string("op_1487"), val = tensor([0, 1, 3, 2])]; tensor var_1489 = const()[name = string("op_1489"), val = tensor([1, 8, 64, 64])]; tensor var_1490 = reshape(shape = var_1489, x = key_states_37)[name = string("op_1490")]; tensor var_1491 = const()[name = string("op_1491"), val = tensor([0, 1, 3, 2])]; tensor var_1493 = const()[name = string("op_1493"), val = tensor([1, 8, 64, 64])]; tensor var_1494 = reshape(shape = var_1493, x = value_states_37)[name = string("op_1494")]; tensor var_1495 = const()[name = string("op_1495"), val = tensor([0, 1, 3, 2])]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_169 = transpose(perm = var_1487, x = var_1486)[name = string("transpose_12")]; tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")]; tensor var_1513 = mul(x = x1_25, y = cos_7)[name = string("op_1513")]; tensor var_1514 = mul(x = x2_25, y = sin_7)[name = string("op_1514")]; tensor var_1515 = sub(x = var_1513, y = var_1514)[name = string("op_1515")]; tensor var_1516 = mul(x = x2_25, y = cos_7)[name = string("op_1516")]; tensor var_1517 = mul(x = x1_25, y = sin_7)[name = string("op_1517")]; tensor var_1518 = add(x = var_1516, y = var_1517)[name = string("op_1518")]; bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)]; tensor rotated_25 = concat(axis = var_44, interleave = rotated_25_interleave_0, values = (var_1515, var_1518))[name = string("rotated_25")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_173 = transpose(perm = var_1491, x = var_1490)[name = string("transpose_11")]; tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = x_173)[name = string("x1_27")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = x_173)[name = string("x2_27")]; tensor var_1534 = mul(x = x1_27, y = cos_7)[name = string("op_1534")]; tensor var_1535 = mul(x = x2_27, y = sin_7)[name = string("op_1535")]; tensor var_1536 = sub(x = var_1534, y = var_1535)[name = string("op_1536")]; tensor var_1537 = mul(x = x2_27, y = cos_7)[name = string("op_1537")]; tensor var_1538 = mul(x = x1_27, y = sin_7)[name = string("op_1538")]; tensor var_1539 = add(x = var_1537, y = var_1538)[name = string("op_1539")]; bool rotated_27_interleave_0 = const()[name = string("rotated_27_interleave_0"), val = bool(false)]; tensor rotated_27 = concat(axis = var_44, interleave = rotated_27_interleave_0, values = (var_1536, var_1539))[name = string("rotated_27")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([6])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor([7])]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")]; tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_366, concat_111_values3_0))[name = string("concat_111")]; tensor model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_27, x = coreml_update_state_27)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_28 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_28")]; tensor expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor([22])]; tensor expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor([0])]; tensor expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([23])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_366, concat_115_values3_0))[name = string("concat_115")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_39 = transpose(perm = var_1495, x = var_1494)[name = string("transpose_10")]; tensor model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_28)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_29 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_29")]; tensor var_1562_begin_0 = const()[name = string("op_1562_begin_0"), val = tensor([6, 0, 0, 0])]; tensor var_1562_end_0 = const()[name = string("op_1562_end_0"), val = tensor([7, 8, 2048, 64])]; tensor var_1562_end_mask_0 = const()[name = string("op_1562_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1562_cast_fp16 = slice_by_index(begin = var_1562_begin_0, end = var_1562_end_0, end_mask = var_1562_end_mask_0, x = coreml_update_state_29)[name = string("op_1562_cast_fp16")]; tensor K_layer_cache_13_axes_0 = const()[name = string("K_layer_cache_13_axes_0"), val = tensor([0])]; tensor K_layer_cache_13_cast_fp16 = squeeze(axes = K_layer_cache_13_axes_0, x = var_1562_cast_fp16)[name = string("K_layer_cache_13_cast_fp16")]; tensor var_1564_begin_0 = const()[name = string("op_1564_begin_0"), val = tensor([22, 0, 0, 0])]; tensor var_1564_end_0 = const()[name = string("op_1564_end_0"), val = tensor([23, 8, 2048, 64])]; tensor var_1564_end_mask_0 = const()[name = string("op_1564_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = var_1564_end_0, end_mask = var_1564_end_mask_0, x = coreml_update_state_29)[name = string("op_1564_cast_fp16")]; tensor V_layer_cache_13_axes_0 = const()[name = string("V_layer_cache_13_axes_0"), val = tensor([0])]; tensor V_layer_cache_13_cast_fp16 = squeeze(axes = V_layer_cache_13_axes_0, x = var_1564_cast_fp16)[name = string("V_layer_cache_13_cast_fp16")]; tensor x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor([1])]; tensor x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_13_cast_fp16)[name = string("x_179_cast_fp16")]; tensor var_1573 = const()[name = string("op_1573"), val = tensor([1, 4, 1, 1])]; tensor x_181_cast_fp16 = tile(reps = var_1573, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_1577 = const()[name = string("op_1577"), val = tensor([1, -1, 2048, 64])]; tensor var_1578_cast_fp16 = reshape(shape = var_1577, x = x_181_cast_fp16)[name = string("op_1578_cast_fp16")]; tensor x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor([1])]; tensor x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_13_cast_fp16)[name = string("x_185_cast_fp16")]; tensor var_1580 = const()[name = string("op_1580"), val = tensor([1, 4, 1, 1])]; tensor x_187_cast_fp16 = tile(reps = var_1580, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")]; bool var_1587_transpose_x_0 = const()[name = string("op_1587_transpose_x_0"), val = bool(false)]; bool var_1587_transpose_y_0 = const()[name = string("op_1587_transpose_y_0"), val = bool(true)]; tensor var_1587_cast_fp16 = matmul(transpose_x = var_1587_transpose_x_0, transpose_y = var_1587_transpose_y_0, x = rotated_25, y = var_1578_cast_fp16)[name = string("op_1587_cast_fp16")]; fp16 var_1588_to_fp16 = const()[name = string("op_1588_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_13_cast_fp16 = mul(x = var_1587_cast_fp16, y = var_1588_to_fp16)[name = string("attn_weights_13_cast_fp16")]; tensor x_189_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")]; tensor reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor([-1])]; bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)]; tensor reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")]; tensor x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")]; tensor exp_x_13_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_13_cast_fp16")]; tensor var_1599_axes_0 = const()[name = string("op_1599_axes_0"), val = tensor([-1])]; bool var_1599_keep_dims_0 = const()[name = string("op_1599_keep_dims_0"), val = bool(true)]; tensor var_1599_cast_fp16 = reduce_sum(axes = var_1599_axes_0, keep_dims = var_1599_keep_dims_0, x = exp_x_13_cast_fp16)[name = string("op_1599_cast_fp16")]; tensor var_1600_cast_fp16 = real_div(x = exp_x_13_cast_fp16, y = var_1599_cast_fp16)[name = string("op_1600_cast_fp16")]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([32, 64, 2048])]; tensor reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1600_cast_fp16)[name = string("reshape_18_cast_fp16")]; tensor concat_121 = const()[name = string("concat_121"), val = tensor([32, 2048, 64])]; tensor reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_125 = const()[name = string("concat_125"), val = tensor([1, 32, 64, 64])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor var_1603_perm_0 = const()[name = string("op_1603_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1605 = const()[name = string("op_1605"), val = tensor([1, 64, 2048])]; tensor var_1603_cast_fp16 = transpose(perm = var_1603_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_9")]; tensor input_89_cast_fp16 = reshape(shape = var_1605, x = var_1603_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273432384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275529600))))[name = string("model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor mean_27_axes_0 = const()[name = string("mean_27_axes_0"), val = tensor([-1])]; bool mean_27_keep_dims_0 = const()[name = string("mean_27_keep_dims_0"), val = bool(true)]; tensor mean_27_cast_fp16 = reduce_mean(axes = mean_27_axes_0, keep_dims = mean_27_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_27_cast_fp16")]; tensor input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_27_cast_fp16)[name = string("input_91_cast_fp16")]; tensor var_1616_axes_0 = const()[name = string("op_1616_axes_0"), val = tensor([-1])]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275537856)))]; tensor var_1616_cast_fp16 = layer_norm(axes = var_1616_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_6_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1616_cast_fp16")]; tensor var_1623 = const()[name = string("op_1623"), val = tensor([0, 2, 1])]; tensor input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor([2])]; tensor var_1624 = transpose(perm = var_1623, x = var_1616_cast_fp16)[name = string("transpose_8")]; tensor input_93 = expand_dims(axes = input_93_axes_0, x = var_1624)[name = string("input_93")]; string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")]; tensor input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor([1, 1])]; tensor input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor([1, 1])]; int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)]; tensor input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_6_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")]; string up_states_13_pad_type_0 = const()[name = string("up_states_13_pad_type_0"), val = string("valid")]; tensor up_states_13_strides_0 = const()[name = string("up_states_13_strides_0"), val = tensor([1, 1])]; tensor up_states_13_pad_0 = const()[name = string("up_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_13_dilations_0 = const()[name = string("up_states_13_dilations_0"), val = tensor([1, 1])]; int32 up_states_13_groups_0 = const()[name = string("up_states_13_groups_0"), val = int32(1)]; tensor up_states_13 = conv(dilations = up_states_13_dilations_0, groups = up_states_13_groups_0, pad = up_states_13_pad_0, pad_type = up_states_13_pad_type_0, strides = up_states_13_strides_0, weight = model_model_layers_6_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states_13")]; tensor gate_states_13 = silu(x = input_95)[name = string("gate_states_13")]; tensor input_97 = mul(x = gate_states_13, y = up_states_13)[name = string("input_97")]; string hidden_states_55_pad_type_0 = const()[name = string("hidden_states_55_pad_type_0"), val = string("valid")]; tensor hidden_states_55_strides_0 = const()[name = string("hidden_states_55_strides_0"), val = tensor([1, 1])]; tensor hidden_states_55_pad_0 = const()[name = string("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_55_dilations_0 = const()[name = string("hidden_states_55_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_55_groups_0 = const()[name = string("hidden_states_55_groups_0"), val = int32(1)]; tensor hidden_states_55 = conv(dilations = hidden_states_55_dilations_0, groups = hidden_states_55_groups_0, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = hidden_states_55_strides_0, weight = model_model_layers_6_mlp_down_proj_weight_palettized, x = input_97)[name = string("hidden_states_55")]; tensor var_1646_axes_0 = const()[name = string("op_1646_axes_0"), val = tensor([2])]; tensor var_1646 = squeeze(axes = var_1646_axes_0, x = hidden_states_55)[name = string("op_1646")]; tensor var_1647 = const()[name = string("op_1647"), val = tensor([0, 2, 1])]; tensor var_1648 = transpose(perm = var_1647, x = var_1646)[name = string("transpose_7")]; tensor hidden_states_57_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = var_1648)[name = string("hidden_states_57_cast_fp16")]; tensor mean_29_axes_0 = const()[name = string("mean_29_axes_0"), val = tensor([-1])]; bool mean_29_keep_dims_0 = const()[name = string("mean_29_keep_dims_0"), val = bool(true)]; tensor mean_29_cast_fp16 = reduce_mean(axes = mean_29_axes_0, keep_dims = mean_29_keep_dims_0, x = hidden_states_57_cast_fp16)[name = string("mean_29_cast_fp16")]; tensor input_99_cast_fp16 = sub(x = hidden_states_57_cast_fp16, y = mean_29_cast_fp16)[name = string("input_99_cast_fp16")]; tensor var_1656_axes_0 = const()[name = string("op_1656_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275542016)))]; tensor var_1656_cast_fp16 = layer_norm(axes = var_1656_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_99_cast_fp16)[name = string("op_1656_cast_fp16")]; tensor var_1660 = const()[name = string("op_1660"), val = tensor([0, 2, 1])]; tensor var_1662_axes_0 = const()[name = string("op_1662_axes_0"), val = tensor([2])]; tensor var_1661 = transpose(perm = var_1660, x = var_1656_cast_fp16)[name = string("transpose_6")]; tensor var_1662 = expand_dims(axes = var_1662_axes_0, x = var_1661)[name = string("op_1662")]; string query_states_29_pad_type_0 = const()[name = string("query_states_29_pad_type_0"), val = string("valid")]; tensor query_states_29_strides_0 = const()[name = string("query_states_29_strides_0"), val = tensor([1, 1])]; tensor query_states_29_pad_0 = const()[name = string("query_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_states_29_dilations_0 = const()[name = string("query_states_29_dilations_0"), val = tensor([1, 1])]; int32 query_states_29_groups_0 = const()[name = string("query_states_29_groups_0"), val = int32(1)]; tensor query_states_29 = conv(dilations = query_states_29_dilations_0, groups = query_states_29_groups_0, pad = query_states_29_pad_0, pad_type = query_states_29_pad_type_0, strides = query_states_29_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_1662)[name = string("query_states_29")]; string key_states_43_pad_type_0 = const()[name = string("key_states_43_pad_type_0"), val = string("valid")]; tensor key_states_43_strides_0 = const()[name = string("key_states_43_strides_0"), val = tensor([1, 1])]; tensor key_states_43_pad_0 = const()[name = string("key_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_states_43_dilations_0 = const()[name = string("key_states_43_dilations_0"), val = tensor([1, 1])]; int32 key_states_43_groups_0 = const()[name = string("key_states_43_groups_0"), val = int32(1)]; tensor key_states_43 = conv(dilations = key_states_43_dilations_0, groups = key_states_43_groups_0, pad = key_states_43_pad_0, pad_type = key_states_43_pad_type_0, strides = key_states_43_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_1662)[name = string("key_states_43")]; string value_states_43_pad_type_0 = const()[name = string("value_states_43_pad_type_0"), val = string("valid")]; tensor value_states_43_strides_0 = const()[name = string("value_states_43_strides_0"), val = tensor([1, 1])]; tensor value_states_43_pad_0 = const()[name = string("value_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_states_43_dilations_0 = const()[name = string("value_states_43_dilations_0"), val = tensor([1, 1])]; int32 value_states_43_groups_0 = const()[name = string("value_states_43_groups_0"), val = int32(1)]; tensor value_states_43 = conv(dilations = value_states_43_dilations_0, groups = value_states_43_groups_0, pad = value_states_43_pad_0, pad_type = value_states_43_pad_type_0, strides = value_states_43_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_1662)[name = string("value_states_43")]; tensor var_1682 = const()[name = string("op_1682"), val = tensor([1, 32, 64, 64])]; tensor var_1683 = reshape(shape = var_1682, x = query_states_29)[name = string("op_1683")]; tensor var_1684 = const()[name = string("op_1684"), val = tensor([0, 1, 3, 2])]; tensor var_1686 = const()[name = string("op_1686"), val = tensor([1, 8, 64, 64])]; tensor var_1687 = reshape(shape = var_1686, x = key_states_43)[name = string("op_1687")]; tensor var_1688 = const()[name = string("op_1688"), val = tensor([0, 1, 3, 2])]; tensor var_1690 = const()[name = string("op_1690"), val = tensor([1, 8, 64, 64])]; tensor var_1691 = reshape(shape = var_1690, x = value_states_43)[name = string("op_1691")]; tensor var_1692 = const()[name = string("op_1692"), val = tensor([0, 1, 3, 2])]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 64, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_197 = transpose(perm = var_1684, x = var_1683)[name = string("transpose_5")]; tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = x_197)[name = string("x1_29")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 64, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = x_197)[name = string("x2_29")]; tensor var_1710 = mul(x = x1_29, y = cos_7)[name = string("op_1710")]; tensor var_1711 = mul(x = x2_29, y = sin_7)[name = string("op_1711")]; tensor var_1712 = sub(x = var_1710, y = var_1711)[name = string("op_1712")]; tensor var_1713 = mul(x = x2_29, y = cos_7)[name = string("op_1713")]; tensor var_1714 = mul(x = x1_29, y = sin_7)[name = string("op_1714")]; tensor var_1715 = add(x = var_1713, y = var_1714)[name = string("op_1715")]; bool rotated_29_interleave_0 = const()[name = string("rotated_29_interleave_0"), val = bool(false)]; tensor rotated_29 = concat(axis = var_44, interleave = rotated_29_interleave_0, values = (var_1712, var_1715))[name = string("rotated_29")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 64, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x_201 = transpose(perm = var_1688, x = var_1687)[name = string("transpose_4")]; tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_201)[name = string("x1")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 64, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_201)[name = string("x2")]; tensor var_1731 = mul(x = x1, y = cos_7)[name = string("op_1731")]; tensor var_1732 = mul(x = x2, y = sin_7)[name = string("op_1732")]; tensor var_1733 = sub(x = var_1731, y = var_1732)[name = string("op_1733")]; tensor var_1734 = mul(x = x2, y = cos_7)[name = string("op_1734")]; tensor var_1735 = mul(x = x1, y = sin_7)[name = string("op_1735")]; tensor var_1736 = add(x = var_1734, y = var_1735)[name = string("op_1736")]; bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)]; tensor rotated = concat(axis = var_44, interleave = rotated_interleave_0, values = (var_1733, var_1736))[name = string("rotated")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([7])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor expand_dims_88 = const()[name = string("expand_dims_88"), val = tensor([8])]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (expand_dims_84, expand_dims_85, current_pos, expand_dims_87))[name = string("concat_128")]; tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (expand_dims_88, concat_129_values1_0, var_366, concat_129_values3_0))[name = string("concat_129")]; tensor model_model_kv_cache_0_internal_tensor_assign_15_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_128, begin_mask = model_model_kv_cache_0_internal_tensor_assign_15_begin_mask_0, end = concat_129, end_mask = model_model_kv_cache_0_internal_tensor_assign_15_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_15_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_15_stride_0, update = rotated, x = coreml_update_state_29)[name = string("model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_15_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_30 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_30")]; tensor expand_dims_90 = const()[name = string("expand_dims_90"), val = tensor([23])]; tensor expand_dims_91 = const()[name = string("expand_dims_91"), val = tensor([0])]; tensor expand_dims_93 = const()[name = string("expand_dims_93"), val = tensor([0])]; tensor expand_dims_94 = const()[name = string("expand_dims_94"), val = tensor([24])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (expand_dims_90, expand_dims_91, current_pos, expand_dims_93))[name = string("concat_132")]; tensor concat_133_values1_0 = const()[name = string("concat_133_values1_0"), val = tensor([0])]; tensor concat_133_values3_0 = const()[name = string("concat_133_values3_0"), val = tensor([0])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (expand_dims_94, concat_133_values1_0, var_366, concat_133_values3_0))[name = string("concat_133")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor value_states_45 = transpose(perm = var_1692, x = var_1691)[name = string("transpose_3")]; tensor model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_132, begin_mask = model_model_kv_cache_0_internal_tensor_assign_16_begin_mask_0, end = concat_133, end_mask = model_model_kv_cache_0_internal_tensor_assign_16_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_16_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_16_stride_0, update = value_states_45, x = coreml_update_state_30)[name = string("model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16")]; write_state(data = model_model_kv_cache_0_internal_tensor_assign_16_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_31 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_31")]; tensor var_1759_begin_0 = const()[name = string("op_1759_begin_0"), val = tensor([7, 0, 0, 0])]; tensor var_1759_end_0 = const()[name = string("op_1759_end_0"), val = tensor([8, 8, 2048, 64])]; tensor var_1759_end_mask_0 = const()[name = string("op_1759_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1759_cast_fp16 = slice_by_index(begin = var_1759_begin_0, end = var_1759_end_0, end_mask = var_1759_end_mask_0, x = coreml_update_state_31)[name = string("op_1759_cast_fp16")]; tensor K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor([0])]; tensor K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1759_cast_fp16)[name = string("K_layer_cache_cast_fp16")]; tensor var_1761_begin_0 = const()[name = string("op_1761_begin_0"), val = tensor([23, 0, 0, 0])]; tensor var_1761_end_0 = const()[name = string("op_1761_end_0"), val = tensor([24, 8, 2048, 64])]; tensor var_1761_end_mask_0 = const()[name = string("op_1761_end_mask_0"), val = tensor([false, true, true, true])]; tensor var_1761_cast_fp16 = slice_by_index(begin = var_1761_begin_0, end = var_1761_end_0, end_mask = var_1761_end_mask_0, x = coreml_update_state_31)[name = string("op_1761_cast_fp16")]; tensor V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor([0])]; tensor V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1761_cast_fp16)[name = string("V_layer_cache_cast_fp16")]; tensor x_207_axes_0 = const()[name = string("x_207_axes_0"), val = tensor([1])]; tensor x_207_cast_fp16 = expand_dims(axes = x_207_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_1770 = const()[name = string("op_1770"), val = tensor([1, 4, 1, 1])]; tensor x_209_cast_fp16 = tile(reps = var_1770, x = x_207_cast_fp16)[name = string("x_209_cast_fp16")]; tensor var_1774 = const()[name = string("op_1774"), val = tensor([1, -1, 2048, 64])]; tensor var_1775_cast_fp16 = reshape(shape = var_1774, x = x_209_cast_fp16)[name = string("op_1775_cast_fp16")]; tensor x_213_axes_0 = const()[name = string("x_213_axes_0"), val = tensor([1])]; tensor x_213_cast_fp16 = expand_dims(axes = x_213_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_1777 = const()[name = string("op_1777"), val = tensor([1, 4, 1, 1])]; tensor x_215_cast_fp16 = tile(reps = var_1777, x = x_213_cast_fp16)[name = string("x_215_cast_fp16")]; bool var_1784_transpose_x_0 = const()[name = string("op_1784_transpose_x_0"), val = bool(false)]; bool var_1784_transpose_y_0 = const()[name = string("op_1784_transpose_y_0"), val = bool(true)]; tensor var_1784_cast_fp16 = matmul(transpose_x = var_1784_transpose_x_0, transpose_y = var_1784_transpose_y_0, x = rotated_29, y = var_1775_cast_fp16)[name = string("op_1784_cast_fp16")]; fp16 var_1785_to_fp16 = const()[name = string("op_1785_to_fp16"), val = fp16(0x1p-3)]; tensor attn_weights_cast_fp16 = mul(x = var_1784_cast_fp16, y = var_1785_to_fp16)[name = string("attn_weights_cast_fp16")]; tensor x_217_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_217_cast_fp16")]; tensor reduce_max_7_axes_0 = const()[name = string("reduce_max_7_axes_0"), val = tensor([-1])]; bool reduce_max_7_keep_dims_0 = const()[name = string("reduce_max_7_keep_dims_0"), val = bool(true)]; tensor reduce_max_7_cast_fp16 = reduce_max(axes = reduce_max_7_axes_0, keep_dims = reduce_max_7_keep_dims_0, x = x_217_cast_fp16)[name = string("reduce_max_7_cast_fp16")]; tensor x_219_cast_fp16 = sub(x = x_217_cast_fp16, y = reduce_max_7_cast_fp16)[name = string("x_219_cast_fp16")]; tensor exp_x_cast_fp16 = exp(x = x_219_cast_fp16)[name = string("exp_x_cast_fp16")]; tensor var_1796_axes_0 = const()[name = string("op_1796_axes_0"), val = tensor([-1])]; bool var_1796_keep_dims_0 = const()[name = string("op_1796_keep_dims_0"), val = bool(true)]; tensor var_1796_cast_fp16 = reduce_sum(axes = var_1796_axes_0, keep_dims = var_1796_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1796_cast_fp16")]; tensor var_1797_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1796_cast_fp16)[name = string("op_1797_cast_fp16")]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([32, 64, 2048])]; tensor reshape_21_cast_fp16 = reshape(shape = concat_138, x = var_1797_cast_fp16)[name = string("reshape_21_cast_fp16")]; tensor concat_139 = const()[name = string("concat_139"), val = tensor([32, 2048, 64])]; tensor reshape_22_cast_fp16 = reshape(shape = concat_139, x = x_215_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_21_cast_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_143 = const()[name = string("concat_143"), val = tensor([1, 32, 64, 64])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_143, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor var_1800_perm_0 = const()[name = string("op_1800_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1802 = const()[name = string("op_1802"), val = tensor([1, 64, 2048])]; tensor var_1800_cast_fp16 = transpose(perm = var_1800_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_2")]; tensor input_103_cast_fp16 = reshape(shape = var_1802, x = var_1800_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275546176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277643392))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = hidden_states_57_cast_fp16, y = linear_7_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; tensor mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor([-1])]; bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)]; tensor mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_61_cast_fp16)[name = string("mean_cast_fp16")]; tensor input_105_cast_fp16 = sub(x = hidden_states_61_cast_fp16, y = mean_cast_fp16)[name = string("input_105_cast_fp16")]; tensor var_1813_axes_0 = const()[name = string("op_1813_axes_0"), val = tensor([-1])]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277651648)))]; tensor var_1813_cast_fp16 = layer_norm(axes = var_1813_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_105_cast_fp16)[name = string("op_1813_cast_fp16")]; tensor var_1820 = const()[name = string("op_1820"), val = tensor([0, 2, 1])]; tensor input_107_axes_0 = const()[name = string("input_107_axes_0"), val = tensor([2])]; tensor var_1821 = transpose(perm = var_1820, x = var_1813_cast_fp16)[name = string("transpose_1")]; tensor input_107 = expand_dims(axes = input_107_axes_0, x = var_1821)[name = string("input_107")]; string input_109_pad_type_0 = const()[name = string("input_109_pad_type_0"), val = string("valid")]; tensor input_109_strides_0 = const()[name = string("input_109_strides_0"), val = tensor([1, 1])]; tensor input_109_pad_0 = const()[name = string("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_109_dilations_0 = const()[name = string("input_109_dilations_0"), val = tensor([1, 1])]; int32 input_109_groups_0 = const()[name = string("input_109_groups_0"), val = int32(1)]; tensor input_109 = conv(dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_107)[name = string("input_109")]; string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")]; tensor up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor([1, 1])]; tensor up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor([1, 1])]; int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)]; tensor up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_107)[name = string("up_states")]; tensor gate_states = silu(x = input_109)[name = string("gate_states")]; tensor input = mul(x = gate_states, y = up_states)[name = string("input")]; string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")]; tensor hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor([1, 1])]; int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)]; tensor hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")]; tensor var_1843_axes_0 = const()[name = string("op_1843_axes_0"), val = tensor([2])]; tensor var_1843 = squeeze(axes = var_1843_axes_0, x = hidden_states_1)[name = string("op_1843")]; tensor var_1844 = const()[name = string("op_1844"), val = tensor([0, 2, 1])]; tensor var_1845 = transpose(perm = var_1844, x = var_1843)[name = string("transpose_0")]; tensor output_hidden_states = add(x = hidden_states_61_cast_fp16, y = var_1845)[name = string("op_1846_cast_fp16")]; } -> (output_hidden_states); }