program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] { func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { tensor var_26_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_26_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_26_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_26_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; tensor var_26_shape_cast_fp16_to_int16 = cast(dtype = var_26_shape_cast_fp16_to_int16_dtype_0, x = var_26_shape_cast_fp16)[name = string("cast_82")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_26_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_30_shape = shape(x = token_data)[name = string("op_30_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_30_shape_to_uint16_dtype_0 = const()[name = string("op_30_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_30_shape_to_uint16 = cast(dtype = var_30_shape_to_uint16_dtype_0, x = var_30_shape)[name = string("cast_80")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_30_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_79")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_81")]; int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; int32 var_50_axis_0 = const()[name = string("op_50_axis_0"), val = int32(0)]; int32 var_50_batch_dims_0 = const()[name = string("op_50_batch_dims_0"), val = int32(0)]; bool var_50_validate_indices_0 = const()[name = string("op_50_validate_indices_0"), val = bool(false)]; tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = token_data, validate_indices = var_50_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_50_cast_fp16")]; int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(512)]; int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; tensor var_53_end_mask_0 = const()[name = string("op_53_end_mask_0"), val = tensor([false, true])]; tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53109888)))]; tensor var_53_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_53_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_53_cast_fp16")]; tensor x_3_cast_fp16 = add(x = var_50_cast_fp16, y = var_53_cast_fp16)[name = string("x_3_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 512])]; tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 512])]; tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 512])]; tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 512])]; tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; int32 var_76 = const()[name = string("op_76"), val = int32(-1)]; tensor var_94_axes_0 = const()[name = string("op_94_axes_0"), val = tensor([-1])]; tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53568704)))]; tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53569792)))]; fp16 var_82_to_fp16 = const()[name = string("op_82_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_94_cast_fp16 = layer_norm(axes = var_94_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_94_cast_fp16")]; tensor var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53570880)))]; tensor var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54095232)))]; tensor linear_0_cast_fp16 = linear(bias = var_106_to_fp16, weight = var_105_to_fp16, x = var_94_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54096320)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54620672)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_109_to_fp16, x = var_94_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54621760)))]; tensor var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55146112)))]; tensor linear_2_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = var_94_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_116_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_116_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_78")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_77")]; int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")]; tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")]; int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(512)]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; tensor var_132_begin_0 = const()[name = string("op_132_begin_0"), val = tensor([0, 0, 0])]; tensor var_132_end_mask_0 = const()[name = string("op_132_end_mask_0"), val = tensor([true, false, true])]; tensor var_132_cast_fp16 = slice_by_index(begin = var_132_begin_0, end = concat_10, end_mask = var_132_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_132_cast_fp16")]; tensor var_135_begin_0 = const()[name = string("op_135_begin_0"), val = tensor([0, 0, 0])]; tensor var_135_end_mask_0 = const()[name = string("op_135_end_mask_0"), val = tensor([true, false, true])]; tensor var_135_cast_fp16 = slice_by_index(begin = var_135_begin_0, end = concat_10, end_mask = var_135_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_135_cast_fp16")]; tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 8, 64])]; tensor var_145_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_145_cast_fp16")]; tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_3_cast_fp16 = mul(x = var_145_cast_fp16, y = const_30_to_fp16)[name = string("q_3_cast_fp16")]; tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 8, 64])]; tensor var_152_cast_fp16 = reshape(shape = concat_13x, x = var_132_cast_fp16)[name = string("op_152_cast_fp16")]; tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_5_cast_fp16 = mul(x = var_152_cast_fp16, y = const_31_to_fp16)[name = string("k_5_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 8, 64])]; tensor var_159_cast_fp16 = reshape(shape = concat_14x, x = var_135_cast_fp16)[name = string("op_159_cast_fp16")]; tensor var_160 = const()[name = string("op_160"), val = tensor([0, 2, 1, 3])]; bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = k_5_cast_fp16)[name = string("transpose_118")]; tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = q_3_cast_fp16)[name = string("transpose_119")]; tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_49, y = transpose_50)[name = string("qk_1_cast_fp16")]; int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; tensor var_163_begin_0 = const()[name = string("op_163_begin_0"), val = tensor([0, 0])]; tensor var_163_end_mask_0 = const()[name = string("op_163_end_mask_0"), val = tensor([false, true])]; tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55147200)))]; tensor var_163_cast_fp16 = slice_by_index(begin = var_163_begin_0, end = concat_15, end_mask = var_163_end_mask_0, x = mask_to_fp16)[name = string("op_163_cast_fp16")]; int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; tensor var_164_begin_0 = const()[name = string("op_164_begin_0"), val = tensor([0, 0])]; tensor var_164_end_mask_0 = const()[name = string("op_164_end_mask_0"), val = tensor([true, false])]; tensor var_164_cast_fp16 = slice_by_index(begin = var_164_begin_0, end = concat_16, end_mask = var_164_end_mask_0, x = var_163_cast_fp16)[name = string("op_164_cast_fp16")]; tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_164_cast_fp16)[name = string("qk_3_cast_fp16")]; tensor var_167_cast_fp16 = softmax(axis = var_76, x = qk_3_cast_fp16)[name = string("op_167_cast_fp16")]; bool var_169_transpose_x_0 = const()[name = string("op_169_transpose_x_0"), val = bool(false)]; bool var_169_transpose_y_0 = const()[name = string("op_169_transpose_y_0"), val = bool(false)]; tensor v_5_cast_fp16 = transpose(perm = var_160, x = var_159_cast_fp16)[name = string("transpose_120")]; tensor var_169_cast_fp16 = matmul(transpose_x = var_169_transpose_x_0, transpose_y = var_169_transpose_y_0, x = var_167_cast_fp16, y = v_5_cast_fp16)[name = string("op_169_cast_fp16")]; tensor var_170 = const()[name = string("op_170"), val = tensor([0, 2, 1, 3])]; tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 512])]; tensor var_171_cast_fp16 = transpose(perm = var_170, x = var_169_cast_fp16)[name = string("transpose_117")]; tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_171_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_175_to_fp16 = const()[name = string("op_175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55548672)))]; tensor var_176_to_fp16 = const()[name = string("op_176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56073024)))]; tensor linear_3_cast_fp16 = linear(bias = var_176_to_fp16, weight = var_175_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_183_axes_0 = const()[name = string("op_183_axes_0"), val = tensor([-1])]; tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56074112)))]; tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56075200)))]; tensor var_183_cast_fp16 = layer_norm(axes = var_183_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_183_cast_fp16")]; tensor var_192_to_fp16 = const()[name = string("op_192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56076288)))]; tensor var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56600640)))]; tensor linear_4_cast_fp16 = linear(bias = var_193_to_fp16, weight = var_192_to_fp16, x = var_183_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56601728)))]; tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 8, 64])]; tensor var_213_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_213_cast_fp16")]; tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_7_cast_fp16 = mul(x = var_213_cast_fp16, y = const_32_to_fp16)[name = string("q_7_cast_fp16")]; tensor var_219 = const()[name = string("op_219"), val = tensor([1, 1500, 8, -1])]; tensor var_220_cast_fp16 = reshape(shape = var_219, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_220_cast_fp16")]; tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_9_cast_fp16 = mul(x = var_220_cast_fp16, y = const_33_to_fp16)[name = string("k_9_cast_fp16")]; tensor var_226 = const()[name = string("op_226"), val = tensor([1, 1500, 8, -1])]; tensor var_227_cast_fp16 = reshape(shape = var_226, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_227_cast_fp16")]; tensor var_228 = const()[name = string("op_228"), val = tensor([0, 2, 1, 3])]; bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = k_9_cast_fp16)[name = string("transpose_114")]; tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = q_7_cast_fp16)[name = string("transpose_115")]; tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_51, y = transpose_52)[name = string("qk_5_cast_fp16")]; tensor var_232_cast_fp16 = softmax(axis = var_76, x = qk_5_cast_fp16)[name = string("op_232_cast_fp16")]; bool var_234_transpose_x_0 = const()[name = string("op_234_transpose_x_0"), val = bool(false)]; bool var_234_transpose_y_0 = const()[name = string("op_234_transpose_y_0"), val = bool(false)]; tensor v_9_cast_fp16 = transpose(perm = var_228, x = var_227_cast_fp16)[name = string("transpose_116")]; tensor var_234_cast_fp16 = matmul(transpose_x = var_234_transpose_x_0, transpose_y = var_234_transpose_y_0, x = var_232_cast_fp16, y = v_9_cast_fp16)[name = string("op_234_cast_fp16")]; tensor var_235 = const()[name = string("op_235"), val = tensor([0, 2, 1, 3])]; tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 512])]; tensor var_236_cast_fp16 = transpose(perm = var_235, x = var_234_cast_fp16)[name = string("transpose_113")]; tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_236_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_240_to_fp16 = const()[name = string("op_240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58137792)))]; tensor var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58662144)))]; tensor linear_5_cast_fp16 = linear(bias = var_241_to_fp16, weight = var_240_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_248_axes_0 = const()[name = string("op_248_axes_0"), val = tensor([-1])]; tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58663232)))]; tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58664320)))]; tensor var_248_cast_fp16 = layer_norm(axes = var_248_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_248_cast_fp16")]; tensor var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58665408)))]; tensor var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60762624)))]; tensor linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_248_cast_fp16)[name = string("linear_6_cast_fp16")]; string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60766784)))]; tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62864000)))]; tensor linear_7_cast_fp16 = linear(bias = var_264_to_fp16, weight = var_263_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 512])]; tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_5_cast_fp16")]; tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 512])]; tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_5_cast_fp16")]; tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 512])]; tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 512])]; tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; int32 var_287 = const()[name = string("op_287"), val = int32(-1)]; tensor var_305_axes_0 = const()[name = string("op_305_axes_0"), val = tensor([-1])]; tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62865088)))]; tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62866176)))]; fp16 var_293_to_fp16 = const()[name = string("op_293_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_305_cast_fp16 = layer_norm(axes = var_305_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_305_cast_fp16")]; tensor var_316_to_fp16 = const()[name = string("op_316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62867264)))]; tensor var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63391616)))]; tensor linear_8_cast_fp16 = linear(bias = var_317_to_fp16, weight = var_316_to_fp16, x = var_305_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63392704)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_320_to_fp16, x = var_305_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63917056)))]; tensor var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64441408)))]; tensor linear_10_cast_fp16 = linear(bias = var_325_to_fp16, weight = var_324_to_fp16, x = var_305_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_327_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_327_shape_cast_fp16")]; int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; string var_327_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_327_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; tensor var_327_shape_cast_fp16_to_uint16 = cast(dtype = var_327_shape_cast_fp16_to_uint16_dtype_0, x = var_327_shape_cast_fp16)[name = string("cast_76")]; uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_327_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_75")]; int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_14 = read_state(input = k_cache1)[name = string("coreml_update_state_14")]; tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_15 = read_state(input = v_cache1)[name = string("coreml_update_state_15")]; int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(512)]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; tensor var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor([0, 0, 0])]; tensor var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor([true, false, true])]; tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = concat_32, end_mask = var_343_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_343_cast_fp16")]; tensor var_346_begin_0 = const()[name = string("op_346_begin_0"), val = tensor([0, 0, 0])]; tensor var_346_end_mask_0 = const()[name = string("op_346_end_mask_0"), val = tensor([true, false, true])]; tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = concat_32, end_mask = var_346_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_346_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 8, 64])]; tensor