program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] { func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { tensor var_62_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_62_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_62_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_62_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; tensor var_62_shape_cast_fp16_to_int16 = cast(dtype = var_62_shape_cast_fp16_to_int16_dtype_0, x = var_62_shape_cast_fp16)[name = string("cast_298")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_62_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_66_shape = shape(x = token_data)[name = string("op_66_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_66_shape_to_uint16_dtype_0 = const()[name = string("op_66_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_66_shape_to_uint16 = cast(dtype = var_66_shape_to_uint16_dtype_0, x = var_66_shape)[name = string("cast_296")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_66_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_295")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_297")]; int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; int32 var_122_axis_0 = const()[name = string("op_122_axis_0"), val = int32(0)]; int32 var_122_batch_dims_0 = const()[name = string("op_122_batch_dims_0"), val = int32(0)]; bool var_122_validate_indices_0 = const()[name = string("op_122_validate_indices_0"), val = bool(false)]; tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_122_cast_fp16 = gather(axis = var_122_axis_0, batch_dims = var_122_batch_dims_0, indices = token_data, validate_indices = var_122_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_122_cast_fp16")]; int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1024)]; int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; tensor var_125_end_mask_0 = const()[name = string("op_125_end_mask_0"), val = tensor([false, true])]; tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106219648)))]; tensor var_125_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_125_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_125_cast_fp16")]; tensor x_3_cast_fp16 = add(x = var_122_cast_fp16, y = var_125_cast_fp16)[name = string("x_3_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 1024])]; tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 1024])]; tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 1024])]; tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 1024])]; tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; int32 var_148 = const()[name = string("op_148"), val = int32(-1)]; tensor var_166_axes_0 = const()[name = string("op_166_axes_0"), val = tensor([-1])]; tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107137216)))]; tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107139328)))]; fp16 var_154_to_fp16 = const()[name = string("op_154_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_166_cast_fp16 = layer_norm(axes = var_166_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_166_cast_fp16")]; tensor var_177_to_fp16 = const()[name = string("op_177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107141440)))]; tensor var_178_to_fp16 = const()[name = string("op_178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109238656)))]; tensor linear_0_cast_fp16 = linear(bias = var_178_to_fp16, weight = var_177_to_fp16, x = var_166_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109240768)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111337984)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_181_to_fp16, x = var_166_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_185_to_fp16 = const()[name = string("op_185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111340096)))]; tensor var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113437312)))]; tensor linear_2_cast_fp16 = linear(bias = var_186_to_fp16, weight = var_185_to_fp16, x = var_166_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_188_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_188_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_188_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_188_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_188_shape_cast_fp16_to_uint16 = cast(dtype = var_188_shape_cast_fp16_to_uint16_dtype_0, x = var_188_shape_cast_fp16)[name = string("cast_294")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_188_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_293")]; int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = k_cache1)[name = string("coreml_update_state_48")]; tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = v_cache1)[name = string("coreml_update_state_49")]; int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1024)]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; tensor var_204_begin_0 = const()[name = string("op_204_begin_0"), val = tensor([0, 0, 0])]; tensor var_204_end_mask_0 = const()[name = string("op_204_end_mask_0"), val = tensor([true, false, true])]; tensor var_204_cast_fp16 = slice_by_index(begin = var_204_begin_0, end = concat_10, end_mask = var_204_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_204_cast_fp16")]; tensor var_207_begin_0 = const()[name = string("op_207_begin_0"), val = tensor([0, 0, 0])]; tensor var_207_end_mask_0 = const()[name = string("op_207_end_mask_0"), val = tensor([true, false, true])]; tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = concat_10, end_mask = var_207_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_207_cast_fp16")]; tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 16, 64])]; tensor var_217_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_217_cast_fp16")]; tensor const_120_to_fp16 = const()[name = string("const_120_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_3_cast_fp16 = mul(x = var_217_cast_fp16, y = const_120_to_fp16)[name = string("q_3_cast_fp16")]; tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 16, 64])]; tensor var_224_cast_fp16 = reshape(shape = concat_13x, x = var_204_cast_fp16)[name = string("op_224_cast_fp16")]; tensor const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_5_cast_fp16 = mul(x = var_224_cast_fp16, y = const_121_to_fp16)[name = string("k_5_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 16, 64])]; tensor var_231_cast_fp16 = reshape(shape = concat_14x, x = var_207_cast_fp16)[name = string("op_231_cast_fp16")]; tensor var_232 = const()[name = string("op_232"), val = tensor([0, 2, 1, 3])]; bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; tensor transpose_193_perm_0 = const()[name = string("transpose_193_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_194_perm_0 = const()[name = string("transpose_194_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_194 = transpose(perm = transpose_194_perm_0, x = k_5_cast_fp16)[name = string("transpose_478")]; tensor transpose_193 = transpose(perm = transpose_193_perm_0, x = q_3_cast_fp16)[name = string("transpose_479")]; tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_193, y = transpose_194)[name = string("qk_1_cast_fp16")]; int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; tensor var_235_begin_0 = const()[name = string("op_235_begin_0"), val = tensor([0, 0])]; tensor var_235_end_mask_0 = const()[name = string("op_235_end_mask_0"), val = tensor([false, true])]; tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113439424)))]; tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = concat_15, end_mask = var_235_end_mask_0, x = mask_to_fp16)[name = string("op_235_cast_fp16")]; int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; tensor var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor([0, 0])]; tensor var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor([true, false])]; tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_16, end_mask = var_236_end_mask_0, x = var_235_cast_fp16)[name = string("op_236_cast_fp16")]; tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_236_cast_fp16)[name = string("qk_3_cast_fp16")]; tensor var_239_cast_fp16 = softmax(axis = var_148, x = qk_3_cast_fp16)[name = string("op_239_cast_fp16")]; bool var_241_transpose_x_0 = const()[name = string("op_241_transpose_x_0"), val = bool(false)]; bool var_241_transpose_y_0 = const()[name = string("op_241_transpose_y_0"), val = bool(false)]; tensor v_5_cast_fp16 = transpose(perm = var_232, x = var_231_cast_fp16)[name = string("transpose_480")]; tensor var_241_cast_fp16 = matmul(transpose_x = var_241_transpose_x_0, transpose_y = var_241_transpose_y_0, x = var_239_cast_fp16, y = v_5_cast_fp16)[name = string("op_241_cast_fp16")]; tensor var_242 = const()[name = string("op_242"), val = tensor([0, 2, 1, 3])]; tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 1024])]; tensor var_243_cast_fp16 = transpose(perm = var_242, x = var_241_cast_fp16)[name = string("transpose_477")]; tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_243_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_247_to_fp16 = const()[name = string("op_247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113840896)))]; tensor var_248_to_fp16 = const()[name = string("op_248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115938112)))]; tensor linear_3_cast_fp16 = linear(bias = var_248_to_fp16, weight = var_247_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_255_axes_0 = const()[name = string("op_255_axes_0"), val = tensor([-1])]; tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115940224)))]; tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115942336)))]; tensor var_255_cast_fp16 = layer_norm(axes = var_255_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_255_cast_fp16")]; tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115944448)))]; tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118041664)))]; tensor linear_4_cast_fp16 = linear(bias = var_265_to_fp16, weight = var_264_to_fp16, x = var_255_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118043776)))]; tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 16, 64])]; tensor var_285_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_285_cast_fp16")]; tensor const_122_to_fp16 = const()[name = string("const_122_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_7_cast_fp16 = mul(x = var_285_cast_fp16, y = const_122_to_fp16)[name = string("q_7_cast_fp16")]; tensor var_291 = const()[name = string("op_291"), val = tensor([1, 1500, 16, -1])]; tensor var_292_cast_fp16 = reshape(shape = var_291, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_292_cast_fp16")]; tensor const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_9_cast_fp16 = mul(x = var_292_cast_fp16, y = const_123_to_fp16)[name = string("k_9_cast_fp16")]; tensor var_298 = const()[name = string("op_298"), val = tensor([1, 1500, 16, -1])]; tensor var_299_cast_fp16 = reshape(shape = var_298, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_299_cast_fp16")]; tensor var_300 = const()[name = string("op_300"), val = tensor([0, 2, 1, 3])]; bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; tensor transpose_195_perm_0 = const()[name = string("transpose_195_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_196_perm_0 = const()[name = string("transpose_196_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_196 = transpose(perm = transpose_196_perm_0, x = k_9_cast_fp16)[name = string("transpose_474")]; tensor transpose_195 = transpose(perm = transpose_195_perm_0, x = q_7_cast_fp16)[name = string("transpose_475")]; tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_195, y = transpose_196)[name = string("qk_5_cast_fp16")]; tensor var_304_cast_fp16 = softmax(axis = var_148, x = qk_5_cast_fp16)[name = string("op_304_cast_fp16")]; bool var_306_transpose_x_0 = const()[name = string("op_306_transpose_x_0"), val = bool(false)]; bool var_306_transpose_y_0 = const()[name = string("op_306_transpose_y_0"), val = bool(false)]; tensor v_9_cast_fp16 = transpose(perm = var_300, x = var_299_cast_fp16)[name = string("transpose_476")]; tensor var_306_cast_fp16 = matmul(transpose_x = var_306_transpose_x_0, transpose_y = var_306_transpose_y_0, x = var_304_cast_fp16, y = v_9_cast_fp16)[name = string("op_306_cast_fp16")]; tensor var_307 = const()[name = string("op_307"), val = tensor([0, 2, 1, 3])]; tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 1024])]; tensor var_308_cast_fp16 = transpose(perm = var_307, x = var_306_cast_fp16)[name = string("transpose_473")]; tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_308_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121115840)))]; tensor var_313_to_fp16 = const()[name = string("op_313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123213056)))]; tensor linear_5_cast_fp16 = linear(bias = var_313_to_fp16, weight = var_312_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_320_axes_0 = const()[name = string("op_320_axes_0"), val = tensor([-1])]; tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123215168)))]; tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123217280)))]; tensor var_320_cast_fp16 = layer_norm(axes = var_320_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_320_cast_fp16")]; tensor var_329_to_fp16 = const()[name = string("op_329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123219392)))]; tensor var_330_to_fp16 = const()[name = string("op_330_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131608064)))]; tensor linear_6_cast_fp16 = linear(bias = var_330_to_fp16, weight = var_329_to_fp16, x = var_320_cast_fp16)[name = string("linear_6_cast_fp16")]; string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131616320)))]; tensor var_336_to_fp16 = const()[name = string("op_336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140004992)))]; tensor linear_7_cast_fp16 = linear(bias = var_336_to_fp16, weight = var_335_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 1024])]; tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_48)[name = string("k_cache_5_cast_fp16")]; tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 1024])]; tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_49)[name = string("v_cache_5_cast_fp16")]; tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 1024])]; tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 1024])]; tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; int32 var_359 = const()[name = string("op_359"), val = int32(-1)]; tensor var_377_axes_0 = const()[name = string("op_377_axes_0"), val = tensor([-1])]; tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140007104)))]; tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140009216)))]; fp16 var_365_to_fp16 = const()[name = string("op_365_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_377_cast_fp16 = layer_norm(axes = var_377_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_377_cast_fp16")]; tensor var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140011328)))]; tensor var_389_to_fp16 = const()[name = string("op_389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142108544)))]; tensor linear_8_cast_fp16 = linear(bias = var_389_to_fp16, weight = var_388_to_fp16, x = var_377_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142110656)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_392_to_fp16, x = var_377_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_396_to_fp16 = const()[name = string("op_396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144207872)))]; tensor var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146305088)))]; tensor linear_10_cast_fp16 = linear(bias = var_397_to_fp16, weight = var_396_to_fp16, x = var_377_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_399_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_399_shape_cast_fp16")]; int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; string var_399_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_399_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; tensor var_399_shape_cast_fp16_to_uint16 = cast(dtype = var_399_shape_cast_fp16_to_uint16_dtype_0, x = var_399_shape_cast_fp16)[name = string("cast_292")]; uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_399_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_291")]; int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_48)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = k_cache1)[name = string("coreml_update_state_50")]; tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_49)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = v_cache1)[name = string("coreml_update_state_51")]; int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1024)]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; tensor var_415_begin_0 = const()[name = string("op_415_begin_0"), val = tensor([0, 0, 0])]; tensor var_415_end_mask_0 = const()[name = string("op_415_end_mask_0"), val = tensor([true, false, true])]; tensor var_415_cast_fp16 = slice_by_index(begin = var_415_begin_0, end = concat_32, end_mask = var_415_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_415_cast_fp16")]; tensor var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor([0, 0, 0])]; tensor var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor([true, false, true])]; tensor var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = concat_32, end_mask = var_418_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_418_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 16, 64])]; tensor var_428_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_428_cast_fp16")]; tensor const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_11_cast_fp16 = mul(x = var_428_cast_fp16, y = const_124_to_fp16)[name = string("q_11_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 16, 64])]; tensor var_435_cast_fp16 = reshape(shape = concat_35x, x = var_415_cast_fp16)[name = string("op_435_cast_fp16")]; tensor const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_15_cast_fp16 = mul(x = var_435_cast_fp16, y = const_125_to_fp16)[name = string("k_15_cast_fp16")]; tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 16, 64])]; tensor var_442_cast_fp16 = reshape(shape = concat_36x, x = var_418_cast_fp16)[name = string("op_442_cast_fp16")]; tensor var_443 = const()[name = string("op_443"), val = tensor([0, 2, 1, 3])]; bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; tensor transpose_197_perm_0 = const()[name = string("transpose_197_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_198_perm_0 = const()[name = string("transpose_198_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_198 = transpose(perm = transpose_198_perm_0, x = k_15_cast_fp16)[name = string("transpose_470")]; tensor transpose_197 = transpose(perm = transpose_197_perm_0, x = q_11_cast_fp16)[name = string("transpose_471")]; tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_197, y = transpose_198)[name = string("qk_7_cast_fp16")]; int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; tensor var_446_begin_0 = const()[name = string("op_446_begin_0"), val = tensor([0, 0])]; tensor var_446_end_mask_0 = const()[name = string("op_446_end_mask_0"), val = tensor([false, true])]; tensor var_446_cast_fp16 = slice_by_index(begin = var_446_begin_0, end = concat_37, end_mask = var_446_end_mask_0, x = mask_to_fp16)[name = string("op_446_cast_fp16")]; int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; tensor var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor([0, 0])]; tensor var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor([true, false])]; tensor var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_38, end_mask = var_447_end_mask_0, x = var_446_cast_fp16)[name = string("op_447_cast_fp16")]; tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_447_cast_fp16)[name = string("qk_9_cast_fp16")]; tensor var_450_cast_fp16 = softmax(axis = var_359, x = qk_9_cast_fp16)[name = string("op_450_cast_fp16")]; bool var_452_transpose_x_0 = const()[name = string("op_452_transpose_x_0"), val = bool(false)]; bool var_452_transpose_y_0 = const()[name = string("op_452_transpose_y_0"), val = bool(false)]; tensor v_15_cast_fp16 = transpose(perm = var_443, x = var_442_cast_fp16)[name = string("transpose_472")]; tensor var_452_cast_fp16 = matmul(transpose_x = var_452_transpose_x_0, transpose_y = var_452_transpose_y_0, x = var_450_cast_fp16, y = v_15_cast_fp16)[name = string("op_452_cast_fp16")]; tensor var_453 = const()[name = string("op_453"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 1024])]; tensor var_454_cast_fp16 = transpose(perm = var_453, x = var_452_cast_fp16)[name = string("transpose_469")]; tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_454_cast_fp16)[name = string("x_25_cast_fp16")]; tensor var_458_to_fp16 = const()[name = string("op_458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146307200)))]; tensor var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148404416)))]; tensor linear_11_cast_fp16 = linear(bias = var_459_to_fp16, weight = var_458_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_466_axes_0 = const()[name = string("op_466_axes_0"), val = tensor([-1])]; tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148406528)))]; tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148408640)))]; tensor var_466_cast_fp16 = layer_norm(axes = var_466_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_466_cast_fp16")]; tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148410752)))]; tensor var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150507968)))]; tensor linear_12_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = var_466_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 16, 64])]; tensor var_496_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_496_cast_fp16")]; tensor const_126_to_fp16 = const()[name = string("const_126_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_15_cast_fp16 = mul(x = var_496_cast_fp16, y = const_126_to_fp16)[name = string("q_15_cast_fp16")]; tensor var_502 = const()[name = string("op_502"), val = tensor([1, 1500, 16, -1])]; tensor var_503_cast_fp16 = reshape(shape = var_502, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_503_cast_fp16")]; tensor const_127_to_fp16 = const()[name = string("const_127_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_19_cast_fp16 = mul(x = var_503_cast_fp16, y = const_127_to_fp16)[name = string("k_19_cast_fp16")]; tensor var_509 = const()[name = string("op_509"), val = tensor([1, 1500, 16, -1])]; tensor var_510_cast_fp16 = reshape(shape = var_509, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_510_cast_fp16")]; tensor var_511 = const()[name = string("op_511"), val = tensor([0, 2, 1, 3])]; bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; tensor transpose_199_perm_0 = const()[name = string("transpose_199_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_200_perm_0 = const()[name = string("transpose_200_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_200 = transpose(perm = transpose_200_perm_0, x = k_19_cast_fp16)[name = string("transpose_466")]; tensor transpose_199 = transpose(perm = transpose_199_perm_0, x = q_15_cast_fp16)[name = string("transpose_467")]; tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_199, y = transpose_200)[name = string("qk_11_cast_fp16")]; tensor var_515_cast_fp16 = softmax(axis = var_359, x = qk_11_cast_fp16)[name = string("op_515_cast_fp16")]; bool var_517_transpose_x_0 = const()[name = string("op_517_transpose_x_0"), val = bool(false)]; bool var_517_transpose_y_0 = const()[name = string("op_517_transpose_y_0"), val = bool(false)]; tensor v_19_cast_fp16 = transpose(perm = var_511, x = var_510_cast_fp16)[name = string("transpose_468")]; tensor var_517_cast_fp16 = matmul(transpose_x = var_517_transpose_x_0, transpose_y = var_517_transpose_y_0, x = var_515_cast_fp16, y = v_19_cast_fp16)[name = string("op_517_cast_fp16")]; tensor var_518 = const()[name = string("op_518"), val = tensor([0, 2, 1, 3])]; tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 1024])]; tensor var_519_cast_fp16 = transpose(perm = var_518, x = var_517_cast_fp16)[name = string("transpose_465")]; tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_519_cast_fp16)[name = string("x_31_cast_fp16")]; tensor var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150510080)))]; tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152607296)))]; tensor linear_13_cast_fp16 = linear(bias = var_524_to_fp16, weight = var_523_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_531_axes_0 = const()[name = string("op_531_axes_0"), val = tensor([-1])]; tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152609408)))]; tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152611520)))]; tensor var_531_cast_fp16 = layer_norm(axes = var_531_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_531_cast_fp16")]; tensor var_540_to_fp16 = const()[name = string("op_540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152613632)))]; tensor var_541_to_fp16 = const()[name = string("op_541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161002304)))]; tensor linear_14_cast_fp16 = linear(bias = var_541_to_fp16, weight = var_540_to_fp16, x = var_531_cast_fp16)[name = string("linear_14_cast_fp16")]; string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; tensor var_546_to_fp16 = const()[name = string("op_546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161010560)))]; tensor var_547_to_fp16 = const()[name = string("op_547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169399232)))]; tensor linear_15_cast_fp16 = linear(bias = var_547_to_fp16, weight = var_546_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 1024])]; tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_50)[name = string("k_cache_9_cast_fp16")]; tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 1024])]; tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_51)[name = string("v_cache_9_cast_fp16")]; tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 1024])]; tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 1024])]; tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; int32 var_570 = const()[name = string("op_570"), val = int32(-1)]; tensor var_588_axes_0 = const()[name = string("op_588_axes_0"), val = tensor([-1])]; tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169401344)))]; tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169403456)))]; fp16 var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_588_cast_fp16 = layer_norm(axes = var_588_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_588_cast_fp16")]; tensor var_599_to_fp16 = const()[name = string("op_599_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169405568)))]; tensor var_600_to_fp16 = const()[name = string("op_600_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171502784)))]; tensor linear_16_cast_fp16 = linear(bias = var_600_to_fp16, weight = var_599_to_fp16, x = var_588_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171504896)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_603_to_fp16, x = var_588_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173602112)))]; tensor var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175699328)))]; tensor linear_18_cast_fp16 = linear(bias = var_608_to_fp16, weight = var_607_to_fp16, x = var_588_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_610_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_610_shape_cast_fp16")]; int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; string var_610_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_610_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; tensor var_610_shape_cast_fp16_to_uint16 = cast(dtype = var_610_shape_cast_fp16_to_uint16_dtype_0, x = var_610_shape_cast_fp16)[name = string("cast_290")]; uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_610_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_289")]; int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_50)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = k_cache1)[name = string("coreml_update_state_52")]; tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_51)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = v_cache1)[name = string("coreml_update_state_53")]; int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1024)]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; tensor var_626_begin_0 = const()[name = string("op_626_begin_0"), val = tensor([0, 0, 0])]; tensor var_626_end_mask_0 = const()[name = string("op_626_end_mask_0"), val = tensor([true, false, true])]; tensor var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = concat_54, end_mask = var_626_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_626_cast_fp16")]; tensor var_629_begin_0 = const()[name = string("op_629_begin_0"), val = tensor([0, 0, 0])]; tensor var_629_end_mask_0 = const()[name = string("op_629_end_mask_0"), val = tensor([true, false, true])]; tensor var_629_cast_fp16 = slice_by_index(begin = var_629_begin_0, end = concat_54, end_mask = var_629_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_629_cast_fp16")]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 16, 64])]; tensor var_639_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_639_cast_fp16")]; tensor const_128_to_fp16 = const()[name = string("const_128_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_19_cast_fp16 = mul(x = var_639_cast_fp16, y = const_128_to_fp16)[name = string("q_19_cast_fp16")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 16, 64])]; tensor var_646_cast_fp16 = reshape(shape = concat_57x, x = var_626_cast_fp16)[name = string("op_646_cast_fp16")]; tensor const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_25_cast_fp16 = mul(x = var_646_cast_fp16, y = const_129_to_fp16)[name = string("k_25_cast_fp16")]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 16, 64])]; tensor var_653_cast_fp16 = reshape(shape = concat_58x, x = var_629_cast_fp16)[name = string("op_653_cast_fp16")]; tensor var_654 = const()[name = string("op_654"), val = tensor([0, 2, 1, 3])]; bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; tensor transpose_201_perm_0 = const()[name = string("transpose_201_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_202_perm_0 = const()[name = string("transpose_202_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_202 = transpose(perm = transpose_202_perm_0, x = k_25_cast_fp16)[name = string("transpose_462")]; tensor transpose_201 = transpose(perm = transpose_201_perm_0, x = q_19_cast_fp16)[name = string("transpose_463")]; tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_201, y = transpose_202)[name = string("qk_13_cast_fp16")]; int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; tensor var_657_begin_0 = const()[name = string("op_657_begin_0"), val = tensor([0, 0])]; tensor var_657_end_mask_0 = const()[name = string("op_657_end_mask_0"), val = tensor([false, true])]; tensor var_657_cast_fp16 = slice_by_index(begin = var_657_begin_0, end = concat_59, end_mask = var_657_end_mask_0, x = mask_to_fp16)[name = string("op_657_cast_fp16")]; int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; tensor var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor([0, 0])]; tensor var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor([true, false])]; tensor var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_60, end_mask = var_658_end_mask_0, x = var_657_cast_fp16)[name = string("op_658_cast_fp16")]; tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_658_cast_fp16)[name = string("qk_15_cast_fp16")]; tensor var_661_cast_fp16 = softmax(axis = var_570, x = qk_15_cast_fp16)[name = string("op_661_cast_fp16")]; bool var_663_transpose_x_0 = const()[name = string("op_663_transpose_x_0"), val = bool(false)]; bool var_663_transpose_y_0 = const()[name = string("op_663_transpose_y_0"), val = bool(false)]; tensor v_25_cast_fp16 = transpose(perm = var_654, x = var_653_cast_fp16)[name = string("transpose_464")]; tensor var_663_cast_fp16 = matmul(transpose_x = var_663_transpose_x_0, transpose_y = var_663_transpose_y_0, x = var_661_cast_fp16, y = v_25_cast_fp16)[name = string("op_663_cast_fp16")]; tensor var_664 = const()[name = string("op_664"), val = tensor([0, 2, 1, 3])]; tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 1024])]; tensor var_665_cast_fp16 = transpose(perm = var_664, x = var_663_cast_fp16)[name = string("transpose_461")]; tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_665_cast_fp16)[name = string("x_43_cast_fp16")]; tensor var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175701440)))]; tensor var_670_to_fp16 = const()[name = string("op_670_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177798656)))]; tensor linear_19_cast_fp16 = linear(bias = var_670_to_fp16, weight = var_669_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor([-1])]; tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177800768)))]; tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177802880)))]; tensor var_677_cast_fp16 = layer_norm(axes = var_677_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_677_cast_fp16")]; tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177804992)))]; tensor var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179902208)))]; tensor linear_20_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = var_677_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 16, 64])]; tensor var_707_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_707_cast_fp16")]; tensor const_130_to_fp16 = const()[name = string("const_130_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_23_cast_fp16 = mul(x = var_707_cast_fp16, y = const_130_to_fp16)[name = string("q_23_cast_fp16")]; tensor var_713 = const()[name = string("op_713"), val = tensor([1, 1500, 16, -1])]; tensor var_714_cast_fp16 = reshape(shape = var_713, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_714_cast_fp16")]; tensor const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_29_cast_fp16 = mul(x = var_714_cast_fp16, y = const_131_to_fp16)[name = string("k_29_cast_fp16")]; tensor var_720 = const()[name = string("op_720"), val = tensor([1, 1500, 16, -1])]; tensor var_721_cast_fp16 = reshape(shape = var_720, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_721_cast_fp16")]; tensor var_722 = const()[name = string("op_722"), val = tensor([0, 2, 1, 3])]; bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; tensor transpose_203_perm_0 = const()[name = string("transpose_203_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_204_perm_0 = const()[name = string("transpose_204_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_204 = transpose(perm = transpose_204_perm_0, x = k_29_cast_fp16)[name = string("transpose_458")]; tensor transpose_203 = transpose(perm = transpose_203_perm_0, x = q_23_cast_fp16)[name = string("transpose_459")]; tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_203, y = transpose_204)[name = string("qk_17_cast_fp16")]; tensor var_726_cast_fp16 = softmax(axis = var_570, x = qk_17_cast_fp16)[name = string("op_726_cast_fp16")]; bool var_728_transpose_x_0 = const()[name = string("op_728_transpose_x_0"), val = bool(false)]; bool var_728_transpose_y_0 = const()[name = string("op_728_transpose_y_0"), val = bool(false)]; tensor v_29_cast_fp16 = transpose(perm = var_722, x = var_721_cast_fp16)[name = string("transpose_460")]; tensor var_728_cast_fp16 = matmul(transpose_x = var_728_transpose_x_0, transpose_y = var_728_transpose_y_0, x = var_726_cast_fp16, y = v_29_cast_fp16)[name = string("op_728_cast_fp16")]; tensor var_729 = const()[name = string("op_729"), val = tensor([0, 2, 1, 3])]; tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 1024])]; tensor var_730_cast_fp16 = transpose(perm = var_729, x = var_728_cast_fp16)[name = string("transpose_457")]; tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_730_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179904320)))]; tensor var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182001536)))]; tensor linear_21_cast_fp16 = linear(bias = var_735_to_fp16, weight = var_734_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; tensor var_742_axes_0 = const()[name = string("op_742_axes_0"), val = tensor([-1])]; tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182003648)))]; tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182005760)))]; tensor var_742_cast_fp16 = layer_norm(axes = var_742_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_742_cast_fp16")]; tensor var_751_to_fp16 = const()[name = string("op_751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182007872)))]; tensor var_752_to_fp16 = const()[name = string("op_752_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190396544)))]; tensor linear_22_cast_fp16 = linear(bias = var_752_to_fp16, weight = var_751_to_fp16, x = var_742_cast_fp16)[name = string("linear_22_cast_fp16")]; string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_757_to_fp16 = const()[name = string("op_757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190404800)))]; tensor var_758_to_fp16 = const()[name = string("op_758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198793472)))]; tensor linear_23_cast_fp16 = linear(bias = var_758_to_fp16, weight = var_757_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 1024])]; tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_52)[name = string("k_cache_13_cast_fp16")]; tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 1024])]; tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_53)[name = string("v_cache_13_cast_fp16")]; tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 1024])]; tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 1024])]; tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; int32 var_781 = const()[name = string("op_781"), val = int32(-1)]; tensor var_799_axes_0 = const()[name = string("op_799_axes_0"), val = tensor([-1])]; tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198795584)))]; tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198797696)))]; fp16 var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_799_cast_fp16 = layer_norm(axes = var_799_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_799_cast_fp16")]; tensor var_810_to_fp16 = const()[name = string("op_810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198799808)))]; tensor var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200897024)))]; tensor linear_24_cast_fp16 = linear(bias = var_811_to_fp16, weight = var_810_to_fp16, x = var_799_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200899136)))]; tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_814_to_fp16, x = var_799_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_818_to_fp16 = const()[name = string("op_818_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202996352)))]; tensor var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205093568)))]; tensor linear_26_cast_fp16 = linear(bias = var_819_to_fp16, weight = var_818_to_fp16, x = var_799_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor var_821_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_821_shape_cast_fp16")]; int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; string var_821_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_821_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; tensor var_821_shape_cast_fp16_to_uint16 = cast(dtype = var_821_shape_cast_fp16_to_uint16_dtype_0, x = var_821_shape_cast_fp16)[name = string("cast_288")]; uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_821_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_287")]; int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_52)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = k_cache1)[name = string("coreml_update_state_54")]; tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_53)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = v_cache1)[name = string("coreml_update_state_55")]; int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1024)]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([0, 0, 0])]; tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([true, false, true])]; tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = concat_76, end_mask = var_837_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_837_cast_fp16")]; tensor var_840_begin_0 = const()[name = string("op_840_begin_0"), val = tensor([0, 0, 0])]; tensor var_840_end_mask_0 = const()[name = string("op_840_end_mask_0"), val = tensor([true, false, true])]; tensor var_840_cast_fp16 = slice_by_index(begin = var_840_begin_0, end = concat_76, end_mask = var_840_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_840_cast_fp16")]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 16, 64])]; tensor var_850_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_850_cast_fp16")]; tensor const_132_to_fp16 = const()[name = string("const_132_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_27_cast_fp16 = mul(x = var_850_cast_fp16, y = const_132_to_fp16)[name = string("q_27_cast_fp16")]; tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 16, 64])]; tensor var_857_cast_fp16 = reshape(shape = concat_79x, x = var_837_cast_fp16)[name = string("op_857_cast_fp16")]; tensor const_133_to_fp16 = const()[name = string("const_133_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_35_cast_fp16 = mul(x = var_857_cast_fp16, y = const_133_to_fp16)[name = string("k_35_cast_fp16")]; tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 16, 64])]; tensor var_864_cast_fp16 = reshape(shape = concat_80x, x = var_840_cast_fp16)[name = string("op_864_cast_fp16")]; tensor var_865 = const()[name = string("op_865"), val = tensor([0, 2, 1, 3])]; bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; tensor transpose_205_perm_0 = const()[name = string("transpose_205_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_206_perm_0 = const()[name = string("transpose_206_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_206 = transpose(perm = transpose_206_perm_0, x = k_35_cast_fp16)[name = string("transpose_454")]; tensor transpose_205 = transpose(perm = transpose_205_perm_0, x = q_27_cast_fp16)[name = string("transpose_455")]; tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_205, y = transpose_206)[name = string("qk_19_cast_fp16")]; int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; tensor var_868_begin_0 = const()[name = string("op_868_begin_0"), val = tensor([0, 0])]; tensor var_868_end_mask_0 = const()[name = string("op_868_end_mask_0"), val = tensor([false, true])]; tensor var_868_cast_fp16 = slice_by_index(begin = var_868_begin_0, end = concat_81, end_mask = var_868_end_mask_0, x = mask_to_fp16)[name = string("op_868_cast_fp16")]; int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; tensor var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor([0, 0])]; tensor var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor([true, false])]; tensor var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_82, end_mask = var_869_end_mask_0, x = var_868_cast_fp16)[name = string("op_869_cast_fp16")]; tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_869_cast_fp16)[name = string("qk_21_cast_fp16")]; tensor var_872_cast_fp16 = softmax(axis = var_781, x = qk_21_cast_fp16)[name = string("op_872_cast_fp16")]; bool var_874_transpose_x_0 = const()[name = string("op_874_transpose_x_0"), val = bool(false)]; bool var_874_transpose_y_0 = const()[name = string("op_874_transpose_y_0"), val = bool(false)]; tensor v_35_cast_fp16 = transpose(perm = var_865, x = var_864_cast_fp16)[name = string("transpose_456")]; tensor var_874_cast_fp16 = matmul(transpose_x = var_874_transpose_x_0, transpose_y = var_874_transpose_y_0, x = var_872_cast_fp16, y = v_35_cast_fp16)[name = string("op_874_cast_fp16")]; tensor var_875 = const()[name = string("op_875"), val = tensor([0, 2, 1, 3])]; tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 1024])]; tensor var_876_cast_fp16 = transpose(perm = var_875, x = var_874_cast_fp16)[name = string("transpose_453")]; tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_876_cast_fp16)[name = string("x_61_cast_fp16")]; tensor var_880_to_fp16 = const()[name = string("op_880_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205095680)))]; tensor var_881_to_fp16 = const()[name = string("op_881_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207192896)))]; tensor linear_27_cast_fp16 = linear(bias = var_881_to_fp16, weight = var_880_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; tensor var_888_axes_0 = const()[name = string("op_888_axes_0"), val = tensor([-1])]; tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207195008)))]; tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207197120)))]; tensor var_888_cast_fp16 = layer_norm(axes = var_888_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_888_cast_fp16")]; tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207199232)))]; tensor var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209296448)))]; tensor linear_28_cast_fp16 = linear(bias = var_898_to_fp16, weight = var_897_to_fp16, x = var_888_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 16, 64])]; tensor var_918_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_918_cast_fp16")]; tensor const_134_to_fp16 = const()[name = string("const_134_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_134_to_fp16)[name = string("q_31_cast_fp16")]; tensor var_924 = const()[name = string("op_924"), val = tensor([1, 1500, 16, -1])]; tensor var_925_cast_fp16 = reshape(shape = var_924, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_925_cast_fp16")]; tensor const_135_to_fp16 = const()[name = string("const_135_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_39_cast_fp16 = mul(x = var_925_cast_fp16, y = const_135_to_fp16)[name = string("k_39_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 1500, 16, -1])]; tensor var_932_cast_fp16 = reshape(shape = var_931, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_932_cast_fp16")]; tensor var_933 = const()[name = string("op_933"), val = tensor([0, 2, 1, 3])]; bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; tensor transpose_207_perm_0 = const()[name = string("transpose_207_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_208_perm_0 = const()[name = string("transpose_208_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_208 = transpose(perm = transpose_208_perm_0, x = k_39_cast_fp16)[name = string("transpose_450")]; tensor transpose_207 = transpose(perm = transpose_207_perm_0, x = q_31_cast_fp16)[name = string("transpose_451")]; tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_207, y = transpose_208)[name = string("qk_23_cast_fp16")]; tensor var_937_cast_fp16 = softmax(axis = var_781, x = qk_23_cast_fp16)[name = string("op_937_cast_fp16")]; bool var_939_transpose_x_0 = const()[name = string("op_939_transpose_x_0"), val = bool(false)]; bool var_939_transpose_y_0 = const()[name = string("op_939_transpose_y_0"), val = bool(false)]; tensor v_39_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_452")]; tensor var_939_cast_fp16 = matmul(transpose_x = var_939_transpose_x_0, transpose_y = var_939_transpose_y_0, x = var_937_cast_fp16, y = v_39_cast_fp16)[name = string("op_939_cast_fp16")]; tensor var_940 = const()[name = string("op_940"), val = tensor([0, 2, 1, 3])]; tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 1024])]; tensor var_941_cast_fp16 = transpose(perm = var_940, x = var_939_cast_fp16)[name = string("transpose_449")]; tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_941_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209298560)))]; tensor var_946_to_fp16 = const()[name = string("op_946_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211395776)))]; tensor linear_29_cast_fp16 = linear(bias = var_946_to_fp16, weight = var_945_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_953_axes_0 = const()[name = string("op_953_axes_0"), val = tensor([-1])]; tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211397888)))]; tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211400000)))]; tensor var_953_cast_fp16 = layer_norm(axes = var_953_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_953_cast_fp16")]; tensor var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211402112)))]; tensor var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219790784)))]; tensor linear_30_cast_fp16 = linear(bias = var_963_to_fp16, weight = var_962_to_fp16, x = var_953_cast_fp16)[name = string("linear_30_cast_fp16")]; string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219799040)))]; tensor var_969_to_fp16 = const()[name = string("op_969_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228187712)))]; tensor linear_31_cast_fp16 = linear(bias = var_969_to_fp16, weight = var_968_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 1024])]; tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_54)[name = string("k_cache_17_cast_fp16")]; tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 1024])]; tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_55)[name = string("v_cache_17_cast_fp16")]; tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 1024])]; tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 1024])]; tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; int32 var_992 = const()[name = string("op_992"), val = int32(-1)]; tensor var_1010_axes_0 = const()[name = string("op_1010_axes_0"), val = tensor([-1])]; tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228189824)))]; tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228191936)))]; fp16 var_998_to_fp16 = const()[name = string("op_998_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1010_cast_fp16 = layer_norm(axes = var_1010_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor var_1021_to_fp16 = const()[name = string("op_1021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228194048)))]; tensor var_1022_to_fp16 = const()[name = string("op_1022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230291264)))]; tensor linear_32_cast_fp16 = linear(bias = var_1022_to_fp16, weight = var_1021_to_fp16, x = var_1010_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230293376)))]; tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1025_to_fp16, x = var_1010_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor var_1029_to_fp16 = const()[name = string("op_1029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232390592)))]; tensor var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234487808)))]; tensor linear_34_cast_fp16 = linear(bias = var_1030_to_fp16, weight = var_1029_to_fp16, x = var_1010_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_1032_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1032_shape_cast_fp16")]; int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; string var_1032_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1032_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; tensor var_1032_shape_cast_fp16_to_uint16 = cast(dtype = var_1032_shape_cast_fp16_to_uint16_dtype_0, x = var_1032_shape_cast_fp16)[name = string("cast_286")]; uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1032_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_285")]; int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_54)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = k_cache1)[name = string("coreml_update_state_56")]; tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_55)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = v_cache1)[name = string("coreml_update_state_57")]; int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1024)]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; tensor var_1048_begin_0 = const()[name = string("op_1048_begin_0"), val = tensor([0, 0, 0])]; tensor var_1048_end_mask_0 = const()[name = string("op_1048_end_mask_0"), val = tensor([true, false, true])]; tensor var_1048_cast_fp16 = slice_by_index(begin = var_1048_begin_0, end = concat_98, end_mask = var_1048_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1048_cast_fp16")]; tensor var_1051_begin_0 = const()[name = string("op_1051_begin_0"), val = tensor([0, 0, 0])]; tensor var_1051_end_mask_0 = const()[name = string("op_1051_end_mask_0"), val = tensor([true, false, true])]; tensor var_1051_cast_fp16 = slice_by_index(begin = var_1051_begin_0, end = concat_98, end_mask = var_1051_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1051_cast_fp16")]; tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 16, 64])]; tensor var_1061_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1061_cast_fp16")]; tensor const_136_to_fp16 = const()[name = string("const_136_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_35_cast_fp16 = mul(x = var_1061_cast_fp16, y = const_136_to_fp16)[name = string("q_35_cast_fp16")]; tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 16, 64])]; tensor var_1068_cast_fp16 = reshape(shape = concat_101x, x = var_1048_cast_fp16)[name = string("op_1068_cast_fp16")]; tensor const_137_to_fp16 = const()[name = string("const_137_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_45_cast_fp16 = mul(x = var_1068_cast_fp16, y = const_137_to_fp16)[name = string("k_45_cast_fp16")]; tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 16, 64])]; tensor var_1075_cast_fp16 = reshape(shape = concat_102x, x = var_1051_cast_fp16)[name = string("op_1075_cast_fp16")]; tensor var_1076 = const()[name = string("op_1076"), val = tensor([0, 2, 1, 3])]; bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; tensor transpose_209_perm_0 = const()[name = string("transpose_209_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_210_perm_0 = const()[name = string("transpose_210_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_210 = transpose(perm = transpose_210_perm_0, x = k_45_cast_fp16)[name = string("transpose_446")]; tensor transpose_209 = transpose(perm = transpose_209_perm_0, x = q_35_cast_fp16)[name = string("transpose_447")]; tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_209, y = transpose_210)[name = string("qk_25_cast_fp16")]; int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; tensor var_1079_begin_0 = const()[name = string("op_1079_begin_0"), val = tensor([0, 0])]; tensor var_1079_end_mask_0 = const()[name = string("op_1079_end_mask_0"), val = tensor([false, true])]; tensor var_1079_cast_fp16 = slice_by_index(begin = var_1079_begin_0, end = concat_103, end_mask = var_1079_end_mask_0, x = mask_to_fp16)[name = string("op_1079_cast_fp16")]; int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; tensor var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor([0, 0])]; tensor var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor([true, false])]; tensor var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_104, end_mask = var_1080_end_mask_0, x = var_1079_cast_fp16)[name = string("op_1080_cast_fp16")]; tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1080_cast_fp16)[name = string("qk_27_cast_fp16")]; tensor var_1083_cast_fp16 = softmax(axis = var_992, x = qk_27_cast_fp16)[name = string("op_1083_cast_fp16")]; bool var_1085_transpose_x_0 = const()[name = string("op_1085_transpose_x_0"), val = bool(false)]; bool var_1085_transpose_y_0 = const()[name = string("op_1085_transpose_y_0"), val = bool(false)]; tensor v_45_cast_fp16 = transpose(perm = var_1076, x = var_1075_cast_fp16)[name = string("transpose_448")]; tensor var_1085_cast_fp16 = matmul(transpose_x = var_1085_transpose_x_0, transpose_y = var_1085_transpose_y_0, x = var_1083_cast_fp16, y = v_45_cast_fp16)[name = string("op_1085_cast_fp16")]; tensor var_1086 = const()[name = string("op_1086"), val = tensor([0, 2, 1, 3])]; tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 1024])]; tensor var_1087_cast_fp16 = transpose(perm = var_1086, x = var_1085_cast_fp16)[name = string("transpose_445")]; tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1087_cast_fp16)[name = string("x_79_cast_fp16")]; tensor var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234489920)))]; tensor var_1092_to_fp16 = const()[name = string("op_1092_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236587136)))]; tensor linear_35_cast_fp16 = linear(bias = var_1092_to_fp16, weight = var_1091_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; tensor var_1099_axes_0 = const()[name = string("op_1099_axes_0"), val = tensor([-1])]; tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236589248)))]; tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236591360)))]; tensor var_1099_cast_fp16 = layer_norm(axes = var_1099_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1099_cast_fp16")]; tensor var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236593472)))]; tensor var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238690688)))]; tensor linear_36_cast_fp16 = linear(bias = var_1109_to_fp16, weight = var_1108_to_fp16, x = var_1099_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 16, 64])]; tensor var_1129_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1129_cast_fp16")]; tensor const_138_to_fp16 = const()[name = string("const_138_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_39_cast_fp16 = mul(x = var_1129_cast_fp16, y = const_138_to_fp16)[name = string("q_39_cast_fp16")]; tensor var_1135 = const()[name = string("op_1135"), val = tensor([1, 1500, 16, -1])]; tensor var_1136_cast_fp16 = reshape(shape = var_1135, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1136_cast_fp16")]; tensor const_139_to_fp16 = const()[name = string("const_139_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_49_cast_fp16 = mul(x = var_1136_cast_fp16, y = const_139_to_fp16)[name = string("k_49_cast_fp16")]; tensor var_1142 = const()[name = string("op_1142"), val = tensor([1, 1500, 16, -1])]; tensor var_1143_cast_fp16 = reshape(shape = var_1142, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1143_cast_fp16")]; tensor var_1144 = const()[name = string("op_1144"), val = tensor([0, 2, 1, 3])]; bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; tensor transpose_211_perm_0 = const()[name = string("transpose_211_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_212_perm_0 = const()[name = string("transpose_212_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_212 = transpose(perm = transpose_212_perm_0, x = k_49_cast_fp16)[name = string("transpose_442")]; tensor transpose_211 = transpose(perm = transpose_211_perm_0, x = q_39_cast_fp16)[name = string("transpose_443")]; tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_211, y = transpose_212)[name = string("qk_29_cast_fp16")]; tensor var_1148_cast_fp16 = softmax(axis = var_992, x = qk_29_cast_fp16)[name = string("op_1148_cast_fp16")]; bool var_1150_transpose_x_0 = const()[name = string("op_1150_transpose_x_0"), val = bool(false)]; bool var_1150_transpose_y_0 = const()[name = string("op_1150_transpose_y_0"), val = bool(false)]; tensor v_49_cast_fp16 = transpose(perm = var_1144, x = var_1143_cast_fp16)[name = string("transpose_444")]; tensor var_1150_cast_fp16 = matmul(transpose_x = var_1150_transpose_x_0, transpose_y = var_1150_transpose_y_0, x = var_1148_cast_fp16, y = v_49_cast_fp16)[name = string("op_1150_cast_fp16")]; tensor var_1151 = const()[name = string("op_1151"), val = tensor([0, 2, 1, 3])]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 1024])]; tensor var_1152_cast_fp16 = transpose(perm = var_1151, x = var_1150_cast_fp16)[name = string("transpose_441")]; tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1152_cast_fp16)[name = string("x_85_cast_fp16")]; tensor var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238692800)))]; tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240790016)))]; tensor linear_37_cast_fp16 = linear(bias = var_1157_to_fp16, weight = var_1156_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_1164_axes_0 = const()[name = string("op_1164_axes_0"), val = tensor([-1])]; tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240792128)))]; tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240794240)))]; tensor var_1164_cast_fp16 = layer_norm(axes = var_1164_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1164_cast_fp16")]; tensor var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240796352)))]; tensor var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249185024)))]; tensor linear_38_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = var_1164_cast_fp16)[name = string("linear_38_cast_fp16")]; string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; tensor var_1179_to_fp16 = const()[name = string("op_1179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249193280)))]; tensor var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257581952)))]; tensor linear_39_cast_fp16 = linear(bias = var_1180_to_fp16, weight = var_1179_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 1024])]; tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_56)[name = string("k_cache_21_cast_fp16")]; tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 1024])]; tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_57)[name = string("v_cache_21_cast_fp16")]; tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 1024])]; tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 1024])]; tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; int32 var_1203 = const()[name = string("op_1203"), val = int32(-1)]; tensor var_1221_axes_0 = const()[name = string("op_1221_axes_0"), val = tensor([-1])]; tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257584064)))]; tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257586176)))]; fp16 var_1209_to_fp16 = const()[name = string("op_1209_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1221_cast_fp16 = layer_norm(axes = var_1221_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1221_cast_fp16")]; tensor var_1232_to_fp16 = const()[name = string("op_1232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257588288)))]; tensor var_1233_to_fp16 = const()[name = string("op_1233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259685504)))]; tensor linear_40_cast_fp16 = linear(bias = var_1233_to_fp16, weight = var_1232_to_fp16, x = var_1221_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor var_1236_to_fp16 = const()[name = string("op_1236_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259687616)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1236_to_fp16, x = var_1221_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261784832)))]; tensor var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263882048)))]; tensor linear_42_cast_fp16 = linear(bias = var_1241_to_fp16, weight = var_1240_to_fp16, x = var_1221_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_1243_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1243_shape_cast_fp16")]; int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; string var_1243_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1243_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; tensor var_1243_shape_cast_fp16_to_uint16 = cast(dtype = var_1243_shape_cast_fp16_to_uint16_dtype_0, x = var_1243_shape_cast_fp16)[name = string("cast_284")]; uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1243_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_283")]; int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_56)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = k_cache1)[name = string("coreml_update_state_58")]; tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_57)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = v_cache1)[name = string("coreml_update_state_59")]; int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1024)]; int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; tensor var_1259_begin_0 = const()[name = string("op_1259_begin_0"), val = tensor([0, 0, 0])]; tensor var_1259_end_mask_0 = const()[name = string("op_1259_end_mask_0"), val = tensor([true, false, true])]; tensor var_1259_cast_fp16 = slice_by_index(begin = var_1259_begin_0, end = concat_120, end_mask = var_1259_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1259_cast_fp16")]; tensor var_1262_begin_0 = const()[name = string("op_1262_begin_0"), val = tensor([0, 0, 0])]; tensor var_1262_end_mask_0 = const()[name = string("op_1262_end_mask_0"), val = tensor([true, false, true])]; tensor var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = concat_120, end_mask = var_1262_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1262_cast_fp16")]; tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 16, 64])]; tensor var_1272_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1272_cast_fp16")]; tensor const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_43_cast_fp16 = mul(x = var_1272_cast_fp16, y = const_140_to_fp16)[name = string("q_43_cast_fp16")]; tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 16, 64])]; tensor var_1279_cast_fp16 = reshape(shape = concat_123x, x = var_1259_cast_fp16)[name = string("op_1279_cast_fp16")]; tensor const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_55_cast_fp16 = mul(x = var_1279_cast_fp16, y = const_141_to_fp16)[name = string("k_55_cast_fp16")]; tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 16, 64])]; tensor var_1286_cast_fp16 = reshape(shape = concat_124x, x = var_1262_cast_fp16)[name = string("op_1286_cast_fp16")]; tensor var_1287 = const()[name = string("op_1287"), val = tensor([0, 2, 1, 3])]; bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; tensor transpose_213_perm_0 = const()[name = string("transpose_213_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_214_perm_0 = const()[name = string("transpose_214_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_214 = transpose(perm = transpose_214_perm_0, x = k_55_cast_fp16)[name = string("transpose_438")]; tensor transpose_213 = transpose(perm = transpose_213_perm_0, x = q_43_cast_fp16)[name = string("transpose_439")]; tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_213, y = transpose_214)[name = string("qk_31_cast_fp16")]; int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; tensor var_1290_begin_0 = const()[name = string("op_1290_begin_0"), val = tensor([0, 0])]; tensor var_1290_end_mask_0 = const()[name = string("op_1290_end_mask_0"), val = tensor([false, true])]; tensor var_1290_cast_fp16 = slice_by_index(begin = var_1290_begin_0, end = concat_125, end_mask = var_1290_end_mask_0, x = mask_to_fp16)[name = string("op_1290_cast_fp16")]; int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([0, 0])]; tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([true, false])]; tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_126, end_mask = var_1291_end_mask_0, x = var_1290_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1291_cast_fp16)[name = string("qk_33_cast_fp16")]; tensor var_1294_cast_fp16 = softmax(axis = var_1203, x = qk_33_cast_fp16)[name = string("op_1294_cast_fp16")]; bool var_1296_transpose_x_0 = const()[name = string("op_1296_transpose_x_0"), val = bool(false)]; bool var_1296_transpose_y_0 = const()[name = string("op_1296_transpose_y_0"), val = bool(false)]; tensor v_55_cast_fp16 = transpose(perm = var_1287, x = var_1286_cast_fp16)[name = string("transpose_440")]; tensor var_1296_cast_fp16 = matmul(transpose_x = var_1296_transpose_x_0, transpose_y = var_1296_transpose_y_0, x = var_1294_cast_fp16, y = v_55_cast_fp16)[name = string("op_1296_cast_fp16")]; tensor var_1297 = const()[name = string("op_1297"), val = tensor([0, 2, 1, 3])]; tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 1024])]; tensor var_1298_cast_fp16 = transpose(perm = var_1297, x = var_1296_cast_fp16)[name = string("transpose_437")]; tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1298_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263884160)))]; tensor var_1303_to_fp16 = const()[name = string("op_1303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265981376)))]; tensor linear_43_cast_fp16 = linear(bias = var_1303_to_fp16, weight = var_1302_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; tensor var_1310_axes_0 = const()[name = string("op_1310_axes_0"), val = tensor([-1])]; tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265983488)))]; tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265985600)))]; tensor var_1310_cast_fp16 = layer_norm(axes = var_1310_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1310_cast_fp16")]; tensor var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265987712)))]; tensor var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268084928)))]; tensor linear_44_cast_fp16 = linear(bias = var_1320_to_fp16, weight = var_1319_to_fp16, x = var_1310_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 16, 64])]; tensor var_1340_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1340_cast_fp16")]; tensor const_142_to_fp16 = const()[name = string("const_142_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_47_cast_fp16 = mul(x = var_1340_cast_fp16, y = const_142_to_fp16)[name = string("q_47_cast_fp16")]; tensor var_1346 = const()[name = string("op_1346"), val = tensor([1, 1500, 16, -1])]; tensor var_1347_cast_fp16 = reshape(shape = var_1346, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1347_cast_fp16")]; tensor const_143_to_fp16 = const()[name = string("const_143_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_59_cast_fp16 = mul(x = var_1347_cast_fp16, y = const_143_to_fp16)[name = string("k_59_cast_fp16")]; tensor var_1353 = const()[name = string("op_1353"), val = tensor([1, 1500, 16, -1])]; tensor var_1354_cast_fp16 = reshape(shape = var_1353, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1354_cast_fp16")]; tensor var_1355 = const()[name = string("op_1355"), val = tensor([0, 2, 1, 3])]; bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; tensor transpose_215_perm_0 = const()[name = string("transpose_215_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_216_perm_0 = const()[name = string("transpose_216_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_216 = transpose(perm = transpose_216_perm_0, x = k_59_cast_fp16)[name = string("transpose_434")]; tensor transpose_215 = transpose(perm = transpose_215_perm_0, x = q_47_cast_fp16)[name = string("transpose_435")]; tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_215, y = transpose_216)[name = string("qk_35_cast_fp16")]; tensor var_1359_cast_fp16 = softmax(axis = var_1203, x = qk_35_cast_fp16)[name = string("op_1359_cast_fp16")]; bool var_1361_transpose_x_0 = const()[name = string("op_1361_transpose_x_0"), val = bool(false)]; bool var_1361_transpose_y_0 = const()[name = string("op_1361_transpose_y_0"), val = bool(false)]; tensor v_59_cast_fp16 = transpose(perm = var_1355, x = var_1354_cast_fp16)[name = string("transpose_436")]; tensor var_1361_cast_fp16 = matmul(transpose_x = var_1361_transpose_x_0, transpose_y = var_1361_transpose_y_0, x = var_1359_cast_fp16, y = v_59_cast_fp16)[name = string("op_1361_cast_fp16")]; tensor var_1362 = const()[name = string("op_1362"), val = tensor([0, 2, 1, 3])]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 1024])]; tensor var_1363_cast_fp16 = transpose(perm = var_1362, x = var_1361_cast_fp16)[name = string("transpose_433")]; tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1363_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_1367_to_fp16 = const()[name = string("op_1367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268087040)))]; tensor var_1368_to_fp16 = const()[name = string("op_1368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270184256)))]; tensor linear_45_cast_fp16 = linear(bias = var_1368_to_fp16, weight = var_1367_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; tensor var_1375_axes_0 = const()[name = string("op_1375_axes_0"), val = tensor([-1])]; tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270186368)))]; tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270188480)))]; tensor var_1375_cast_fp16 = layer_norm(axes = var_1375_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1375_cast_fp16")]; tensor var_1384_to_fp16 = const()[name = string("op_1384_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270190592)))]; tensor var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278579264)))]; tensor linear_46_cast_fp16 = linear(bias = var_1385_to_fp16, weight = var_1384_to_fp16, x = var_1375_cast_fp16)[name = string("linear_46_cast_fp16")]; string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_1390_to_fp16 = const()[name = string("op_1390_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278587520)))]; tensor var_1391_to_fp16 = const()[name = string("op_1391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286976192)))]; tensor linear_47_cast_fp16 = linear(bias = var_1391_to_fp16, weight = var_1390_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 1024])]; tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_58)[name = string("k_cache_25_cast_fp16")]; tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 1024])]; tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_59)[name = string("v_cache_25_cast_fp16")]; tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 1024])]; tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 1024])]; tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; int32 var_1414 = const()[name = string("op_1414"), val = int32(-1)]; tensor var_1432_axes_0 = const()[name = string("op_1432_axes_0"), val = tensor([-1])]; tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286978304)))]; tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286980416)))]; fp16 var_1420_to_fp16 = const()[name = string("op_1420_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1432_cast_fp16 = layer_norm(axes = var_1432_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1432_cast_fp16")]; tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286982528)))]; tensor var_1444_to_fp16 = const()[name = string("op_1444_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289079744)))]; tensor linear_48_cast_fp16 = linear(bias = var_1444_to_fp16, weight = var_1443_to_fp16, x = var_1432_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor var_1447_to_fp16 = const()[name = string("op_1447_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289081856)))]; tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1447_to_fp16, x = var_1432_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor var_1451_to_fp16 = const()[name = string("op_1451_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179072)))]; tensor var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293276288)))]; tensor linear_50_cast_fp16 = linear(bias = var_1452_to_fp16, weight = var_1451_to_fp16, x = var_1432_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor var_1454_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1454_shape_cast_fp16")]; int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; string var_1454_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1454_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; tensor var_1454_shape_cast_fp16_to_uint16 = cast(dtype = var_1454_shape_cast_fp16_to_uint16_dtype_0, x = var_1454_shape_cast_fp16)[name = string("cast_282")]; uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1454_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_281")]; int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_58)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = k_cache1)[name = string("coreml_update_state_60")]; tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_59)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = v_cache1)[name = string("coreml_update_state_61")]; int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1024)]; int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; tensor var_1470_begin_0 = const()[name = string("op_1470_begin_0"), val = tensor([0, 0, 0])]; tensor var_1470_end_mask_0 = const()[name = string("op_1470_end_mask_0"), val = tensor([true, false, true])]; tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = concat_142, end_mask = var_1470_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1470_cast_fp16")]; tensor var_1473_begin_0 = const()[name = string("op_1473_begin_0"), val = tensor([0, 0, 0])]; tensor var_1473_end_mask_0 = const()[name = string("op_1473_end_mask_0"), val = tensor([true, false, true])]; tensor var_1473_cast_fp16 = slice_by_index(begin = var_1473_begin_0, end = concat_142, end_mask = var_1473_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1473_cast_fp16")]; tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 16, 64])]; tensor var_1483_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1483_cast_fp16")]; tensor const_144_to_fp16 = const()[name = string("const_144_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_51_cast_fp16 = mul(x = var_1483_cast_fp16, y = const_144_to_fp16)[name = string("q_51_cast_fp16")]; tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 16, 64])]; tensor var_1490_cast_fp16 = reshape(shape = concat_145x, x = var_1470_cast_fp16)[name = string("op_1490_cast_fp16")]; tensor const_145_to_fp16 = const()[name = string("const_145_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_65_cast_fp16 = mul(x = var_1490_cast_fp16, y = const_145_to_fp16)[name = string("k_65_cast_fp16")]; tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 16, 64])]; tensor var_1497_cast_fp16 = reshape(shape = concat_146x, x = var_1473_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor var_1498 = const()[name = string("op_1498"), val = tensor([0, 2, 1, 3])]; bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; tensor transpose_217_perm_0 = const()[name = string("transpose_217_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_218_perm_0 = const()[name = string("transpose_218_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_218 = transpose(perm = transpose_218_perm_0, x = k_65_cast_fp16)[name = string("transpose_430")]; tensor transpose_217 = transpose(perm = transpose_217_perm_0, x = q_51_cast_fp16)[name = string("transpose_431")]; tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_217, y = transpose_218)[name = string("qk_37_cast_fp16")]; int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; tensor var_1501_begin_0 = const()[name = string("op_1501_begin_0"), val = tensor([0, 0])]; tensor var_1501_end_mask_0 = const()[name = string("op_1501_end_mask_0"), val = tensor([false, true])]; tensor var_1501_cast_fp16 = slice_by_index(begin = var_1501_begin_0, end = concat_147, end_mask = var_1501_end_mask_0, x = mask_to_fp16)[name = string("op_1501_cast_fp16")]; int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; tensor var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor([0, 0])]; tensor var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor([true, false])]; tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_148, end_mask = var_1502_end_mask_0, x = var_1501_cast_fp16)[name = string("op_1502_cast_fp16")]; tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1502_cast_fp16)[name = string("qk_39_cast_fp16")]; tensor var_1505_cast_fp16 = softmax(axis = var_1414, x = qk_39_cast_fp16)[name = string("op_1505_cast_fp16")]; bool var_1507_transpose_x_0 = const()[name = string("op_1507_transpose_x_0"), val = bool(false)]; bool var_1507_transpose_y_0 = const()[name = string("op_1507_transpose_y_0"), val = bool(false)]; tensor v_65_cast_fp16 = transpose(perm = var_1498, x = var_1497_cast_fp16)[name = string("transpose_432")]; tensor var_1507_cast_fp16 = matmul(transpose_x = var_1507_transpose_x_0, transpose_y = var_1507_transpose_y_0, x = var_1505_cast_fp16, y = v_65_cast_fp16)[name = string("op_1507_cast_fp16")]; tensor var_1508 = const()[name = string("op_1508"), val = tensor([0, 2, 1, 3])]; tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 1024])]; tensor var_1509_cast_fp16 = transpose(perm = var_1508, x = var_1507_cast_fp16)[name = string("transpose_429")]; tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1509_cast_fp16)[name = string("x_115_cast_fp16")]; tensor var_1513_to_fp16 = const()[name = string("op_1513_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293278400)))]; tensor var_1514_to_fp16 = const()[name = string("op_1514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295375616)))]; tensor linear_51_cast_fp16 = linear(bias = var_1514_to_fp16, weight = var_1513_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; tensor var_1521_axes_0 = const()[name = string("op_1521_axes_0"), val = tensor([-1])]; tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295377728)))]; tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295379840)))]; tensor var_1521_cast_fp16 = layer_norm(axes = var_1521_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1521_cast_fp16")]; tensor var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295381952)))]; tensor var_1531_to_fp16 = const()[name = string("op_1531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297479168)))]; tensor linear_52_cast_fp16 = linear(bias = var_1531_to_fp16, weight = var_1530_to_fp16, x = var_1521_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 16, 64])]; tensor var_1551_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1551_cast_fp16")]; tensor const_146_to_fp16 = const()[name = string("const_146_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_55_cast_fp16 = mul(x = var_1551_cast_fp16, y = const_146_to_fp16)[name = string("q_55_cast_fp16")]; tensor var_1557 = const()[name = string("op_1557"), val = tensor([1, 1500, 16, -1])]; tensor var_1558_cast_fp16 = reshape(shape = var_1557, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1558_cast_fp16")]; tensor const_147_to_fp16 = const()[name = string("const_147_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_69_cast_fp16 = mul(x = var_1558_cast_fp16, y = const_147_to_fp16)[name = string("k_69_cast_fp16")]; tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 1500, 16, -1])]; tensor var_1565_cast_fp16 = reshape(shape = var_1564, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1565_cast_fp16")]; tensor var_1566 = const()[name = string("op_1566"), val = tensor([0, 2, 1, 3])]; bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; tensor transpose_219_perm_0 = const()[name = string("transpose_219_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_220_perm_0 = const()[name = string("transpose_220_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_220 = transpose(perm = transpose_220_perm_0, x = k_69_cast_fp16)[name = string("transpose_426")]; tensor transpose_219 = transpose(perm = transpose_219_perm_0, x = q_55_cast_fp16)[name = string("transpose_427")]; tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_219, y = transpose_220)[name = string("qk_41_cast_fp16")]; tensor var_1570_cast_fp16 = softmax(axis = var_1414, x = qk_41_cast_fp16)[name = string("op_1570_cast_fp16")]; bool var_1572_transpose_x_0 = const()[name = string("op_1572_transpose_x_0"), val = bool(false)]; bool var_1572_transpose_y_0 = const()[name = string("op_1572_transpose_y_0"), val = bool(false)]; tensor v_69_cast_fp16 = transpose(perm = var_1566, x = var_1565_cast_fp16)[name = string("transpose_428")]; tensor var_1572_cast_fp16 = matmul(transpose_x = var_1572_transpose_x_0, transpose_y = var_1572_transpose_y_0, x = var_1570_cast_fp16, y = v_69_cast_fp16)[name = string("op_1572_cast_fp16")]; tensor var_1573 = const()[name = string("op_1573"), val = tensor([0, 2, 1, 3])]; tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 1024])]; tensor var_1574_cast_fp16 = transpose(perm = var_1573, x = var_1572_cast_fp16)[name = string("transpose_425")]; tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1574_cast_fp16)[name = string("x_121_cast_fp16")]; tensor var_1578_to_fp16 = const()[name = string("op_1578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297481280)))]; tensor var_1579_to_fp16 = const()[name = string("op_1579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299578496)))]; tensor linear_53_cast_fp16 = linear(bias = var_1579_to_fp16, weight = var_1578_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1586_axes_0 = const()[name = string("op_1586_axes_0"), val = tensor([-1])]; tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299580608)))]; tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299582720)))]; tensor var_1586_cast_fp16 = layer_norm(axes = var_1586_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1586_cast_fp16")]; tensor var_1595_to_fp16 = const()[name = string("op_1595_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299584832)))]; tensor var_1596_to_fp16 = const()[name = string("op_1596_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307973504)))]; tensor linear_54_cast_fp16 = linear(bias = var_1596_to_fp16, weight = var_1595_to_fp16, x = var_1586_cast_fp16)[name = string("linear_54_cast_fp16")]; string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_1601_to_fp16 = const()[name = string("op_1601_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307981760)))]; tensor var_1602_to_fp16 = const()[name = string("op_1602_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316370432)))]; tensor linear_55_cast_fp16 = linear(bias = var_1602_to_fp16, weight = var_1601_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 1024])]; tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_60)[name = string("k_cache_29_cast_fp16")]; tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 1024])]; tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_61)[name = string("v_cache_29_cast_fp16")]; tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 1024])]; tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 1024])]; tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; int32 var_1625 = const()[name = string("op_1625"), val = int32(-1)]; tensor var_1643_axes_0 = const()[name = string("op_1643_axes_0"), val = tensor([-1])]; tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316372544)))]; tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316374656)))]; fp16 var_1631_to_fp16 = const()[name = string("op_1631_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1643_cast_fp16 = layer_norm(axes = var_1643_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1643_cast_fp16")]; tensor var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316376768)))]; tensor var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318473984)))]; tensor linear_56_cast_fp16 = linear(bias = var_1655_to_fp16, weight = var_1654_to_fp16, x = var_1643_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor var_1658_to_fp16 = const()[name = string("op_1658_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318476096)))]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1658_to_fp16, x = var_1643_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor var_1662_to_fp16 = const()[name = string("op_1662_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320573312)))]; tensor var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322670528)))]; tensor linear_58_cast_fp16 = linear(bias = var_1663_to_fp16, weight = var_1662_to_fp16, x = var_1643_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor var_1665_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1665_shape_cast_fp16")]; int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; string var_1665_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1665_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; tensor var_1665_shape_cast_fp16_to_uint16 = cast(dtype = var_1665_shape_cast_fp16_to_uint16_dtype_0, x = var_1665_shape_cast_fp16)[name = string("cast_280")]; uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1665_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_279")]; int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_60)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = k_cache1)[name = string("coreml_update_state_62")]; tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_61)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = v_cache1)[name = string("coreml_update_state_63")]; int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1024)]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; tensor var_1681_begin_0 = const()[name = string("op_1681_begin_0"), val = tensor([0, 0, 0])]; tensor var_1681_end_mask_0 = const()[name = string("op_1681_end_mask_0"), val = tensor([true, false, true])]; tensor var_1681_cast_fp16 = slice_by_index(begin = var_1681_begin_0, end = concat_164, end_mask = var_1681_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1681_cast_fp16")]; tensor var_1684_begin_0 = const()[name = string("op_1684_begin_0"), val = tensor([0, 0, 0])]; tensor var_1684_end_mask_0 = const()[name = string("op_1684_end_mask_0"), val = tensor([true, false, true])]; tensor var_1684_cast_fp16 = slice_by_index(begin = var_1684_begin_0, end = concat_164, end_mask = var_1684_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1684_cast_fp16")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 16, 64])]; tensor var_1694_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1694_cast_fp16")]; tensor const_148_to_fp16 = const()[name = string("const_148_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_59_cast_fp16 = mul(x = var_1694_cast_fp16, y = const_148_to_fp16)[name = string("q_59_cast_fp16")]; tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 16, 64])]; tensor var_1701_cast_fp16 = reshape(shape = concat_167x, x = var_1681_cast_fp16)[name = string("op_1701_cast_fp16")]; tensor const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_75_cast_fp16 = mul(x = var_1701_cast_fp16, y = const_149_to_fp16)[name = string("k_75_cast_fp16")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 16, 64])]; tensor var_1708_cast_fp16 = reshape(shape = concat_168x, x = var_1684_cast_fp16)[name = string("op_1708_cast_fp16")]; tensor var_1709 = const()[name = string("op_1709"), val = tensor([0, 2, 1, 3])]; bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; tensor transpose_221_perm_0 = const()[name = string("transpose_221_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_222_perm_0 = const()[name = string("transpose_222_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_222 = transpose(perm = transpose_222_perm_0, x = k_75_cast_fp16)[name = string("transpose_422")]; tensor transpose_221 = transpose(perm = transpose_221_perm_0, x = q_59_cast_fp16)[name = string("transpose_423")]; tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_221, y = transpose_222)[name = string("qk_43_cast_fp16")]; int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; tensor var_1712_begin_0 = const()[name = string("op_1712_begin_0"), val = tensor([0, 0])]; tensor var_1712_end_mask_0 = const()[name = string("op_1712_end_mask_0"), val = tensor([false, true])]; tensor var_1712_cast_fp16 = slice_by_index(begin = var_1712_begin_0, end = concat_169, end_mask = var_1712_end_mask_0, x = mask_to_fp16)[name = string("op_1712_cast_fp16")]; int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([0, 0])]; tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([true, false])]; tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_170, end_mask = var_1713_end_mask_0, x = var_1712_cast_fp16)[name = string("op_1713_cast_fp16")]; tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1713_cast_fp16)[name = string("qk_45_cast_fp16")]; tensor var_1716_cast_fp16 = softmax(axis = var_1625, x = qk_45_cast_fp16)[name = string("op_1716_cast_fp16")]; bool var_1718_transpose_x_0 = const()[name = string("op_1718_transpose_x_0"), val = bool(false)]; bool var_1718_transpose_y_0 = const()[name = string("op_1718_transpose_y_0"), val = bool(false)]; tensor v_75_cast_fp16 = transpose(perm = var_1709, x = var_1708_cast_fp16)[name = string("transpose_424")]; tensor var_1718_cast_fp16 = matmul(transpose_x = var_1718_transpose_x_0, transpose_y = var_1718_transpose_y_0, x = var_1716_cast_fp16, y = v_75_cast_fp16)[name = string("op_1718_cast_fp16")]; tensor var_1719 = const()[name = string("op_1719"), val = tensor([0, 2, 1, 3])]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 1024])]; tensor var_1720_cast_fp16 = transpose(perm = var_1719, x = var_1718_cast_fp16)[name = string("transpose_421")]; tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1720_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322672640)))]; tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324769856)))]; tensor linear_59_cast_fp16 = linear(bias = var_1725_to_fp16, weight = var_1724_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; tensor var_1732_axes_0 = const()[name = string("op_1732_axes_0"), val = tensor([-1])]; tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324771968)))]; tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324774080)))]; tensor var_1732_cast_fp16 = layer_norm(axes = var_1732_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1732_cast_fp16")]; tensor var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324776192)))]; tensor var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326873408)))]; tensor linear_60_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1732_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 16, 64])]; tensor var_1762_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1762_cast_fp16")]; tensor const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_63_cast_fp16 = mul(x = var_1762_cast_fp16, y = const_150_to_fp16)[name = string("q_63_cast_fp16")]; tensor var_1768 = const()[name = string("op_1768"), val = tensor([1, 1500, 16, -1])]; tensor var_1769_cast_fp16 = reshape(shape = var_1768, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1769_cast_fp16")]; tensor const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_79_cast_fp16 = mul(x = var_1769_cast_fp16, y = const_151_to_fp16)[name = string("k_79_cast_fp16")]; tensor var_1775 = const()[name = string("op_1775"), val = tensor([1, 1500, 16, -1])]; tensor var_1776_cast_fp16 = reshape(shape = var_1775, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1776_cast_fp16")]; tensor var_1777 = const()[name = string("op_1777"), val = tensor([0, 2, 1, 3])]; bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; tensor transpose_223_perm_0 = const()[name = string("transpose_223_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_224_perm_0 = const()[name = string("transpose_224_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_224 = transpose(perm = transpose_224_perm_0, x = k_79_cast_fp16)[name = string("transpose_418")]; tensor transpose_223 = transpose(perm = transpose_223_perm_0, x = q_63_cast_fp16)[name = string("transpose_419")]; tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_223, y = transpose_224)[name = string("qk_47_cast_fp16")]; tensor var_1781_cast_fp16 = softmax(axis = var_1625, x = qk_47_cast_fp16)[name = string("op_1781_cast_fp16")]; bool var_1783_transpose_x_0 = const()[name = string("op_1783_transpose_x_0"), val = bool(false)]; bool var_1783_transpose_y_0 = const()[name = string("op_1783_transpose_y_0"), val = bool(false)]; tensor v_79_cast_fp16 = transpose(perm = var_1777, x = var_1776_cast_fp16)[name = string("transpose_420")]; tensor var_1783_cast_fp16 = matmul(transpose_x = var_1783_transpose_x_0, transpose_y = var_1783_transpose_y_0, x = var_1781_cast_fp16, y = v_79_cast_fp16)[name = string("op_1783_cast_fp16")]; tensor var_1784 = const()[name = string("op_1784"), val = tensor([0, 2, 1, 3])]; tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 1024])]; tensor var_1785_cast_fp16 = transpose(perm = var_1784, x = var_1783_cast_fp16)[name = string("transpose_417")]; tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1785_cast_fp16)[name = string("x_139_cast_fp16")]; tensor var_1789_to_fp16 = const()[name = string("op_1789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326875520)))]; tensor var_1790_to_fp16 = const()[name = string("op_1790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328972736)))]; tensor linear_61_cast_fp16 = linear(bias = var_1790_to_fp16, weight = var_1789_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; tensor var_1797_axes_0 = const()[name = string("op_1797_axes_0"), val = tensor([-1])]; tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328974848)))]; tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328976960)))]; tensor var_1797_cast_fp16 = layer_norm(axes = var_1797_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1797_cast_fp16")]; tensor var_1806_to_fp16 = const()[name = string("op_1806_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328979072)))]; tensor var_1807_to_fp16 = const()[name = string("op_1807_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337367744)))]; tensor linear_62_cast_fp16 = linear(bias = var_1807_to_fp16, weight = var_1806_to_fp16, x = var_1797_cast_fp16)[name = string("linear_62_cast_fp16")]; string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; tensor var_1812_to_fp16 = const()[name = string("op_1812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337376000)))]; tensor var_1813_to_fp16 = const()[name = string("op_1813_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345764672)))]; tensor linear_63_cast_fp16 = linear(bias = var_1813_to_fp16, weight = var_1812_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 1024])]; tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_62)[name = string("k_cache_33_cast_fp16")]; tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 1024])]; tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_63)[name = string("v_cache_33_cast_fp16")]; tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 1024])]; tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 1024])]; tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; int32 var_1836 = const()[name = string("op_1836"), val = int32(-1)]; tensor var_1854_axes_0 = const()[name = string("op_1854_axes_0"), val = tensor([-1])]; tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345766784)))]; tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345768896)))]; fp16 var_1842_to_fp16 = const()[name = string("op_1842_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1854_cast_fp16 = layer_norm(axes = var_1854_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1854_cast_fp16")]; tensor var_1865_to_fp16 = const()[name = string("op_1865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345771008)))]; tensor var_1866_to_fp16 = const()[name = string("op_1866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347868224)))]; tensor linear_64_cast_fp16 = linear(bias = var_1866_to_fp16, weight = var_1865_to_fp16, x = var_1854_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor var_1869_to_fp16 = const()[name = string("op_1869_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347870336)))]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1869_to_fp16, x = var_1854_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor var_1873_to_fp16 = const()[name = string("op_1873_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349967552)))]; tensor var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064768)))]; tensor linear_66_cast_fp16 = linear(bias = var_1874_to_fp16, weight = var_1873_to_fp16, x = var_1854_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor var_1876_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1876_shape_cast_fp16")]; int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; string var_1876_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1876_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; tensor var_1876_shape_cast_fp16_to_uint16 = cast(dtype = var_1876_shape_cast_fp16_to_uint16_dtype_0, x = var_1876_shape_cast_fp16)[name = string("cast_278")]; uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1876_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_277")]; int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_62)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")]; tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_63)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")]; int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1024)]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; tensor var_1892_begin_0 = const()[name = string("op_1892_begin_0"), val = tensor([0, 0, 0])]; tensor var_1892_end_mask_0 = const()[name = string("op_1892_end_mask_0"), val = tensor([true, false, true])]; tensor var_1892_cast_fp16 = slice_by_index(begin = var_1892_begin_0, end = concat_186, end_mask = var_1892_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1892_cast_fp16")]; tensor var_1895_begin_0 = const()[name = string("op_1895_begin_0"), val = tensor([0, 0, 0])]; tensor var_1895_end_mask_0 = const()[name = string("op_1895_end_mask_0"), val = tensor([true, false, true])]; tensor var_1895_cast_fp16 = slice_by_index(begin = var_1895_begin_0, end = concat_186, end_mask = var_1895_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1895_cast_fp16")]; tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 16, 64])]; tensor var_1905_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1905_cast_fp16")]; tensor const_152_to_fp16 = const()[name = string("const_152_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_67_cast_fp16 = mul(x = var_1905_cast_fp16, y = const_152_to_fp16)[name = string("q_67_cast_fp16")]; tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 16, 64])]; tensor var_1912_cast_fp16 = reshape(shape = concat_189x, x = var_1892_cast_fp16)[name = string("op_1912_cast_fp16")]; tensor const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_85_cast_fp16 = mul(x = var_1912_cast_fp16, y = const_153_to_fp16)[name = string("k_85_cast_fp16")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 16, 64])]; tensor var_1919_cast_fp16 = reshape(shape = concat_190x, x = var_1895_cast_fp16)[name = string("op_1919_cast_fp16")]; tensor var_1920 = const()[name = string("op_1920"), val = tensor([0, 2, 1, 3])]; bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; tensor transpose_225_perm_0 = const()[name = string("transpose_225_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_226_perm_0 = const()[name = string("transpose_226_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_226 = transpose(perm = transpose_226_perm_0, x = k_85_cast_fp16)[name = string("transpose_414")]; tensor transpose_225 = transpose(perm = transpose_225_perm_0, x = q_67_cast_fp16)[name = string("transpose_415")]; tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_225, y = transpose_226)[name = string("qk_49_cast_fp16")]; int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; tensor var_1923_begin_0 = const()[name = string("op_1923_begin_0"), val = tensor([0, 0])]; tensor var_1923_end_mask_0 = const()[name = string("op_1923_end_mask_0"), val = tensor([false, true])]; tensor var_1923_cast_fp16 = slice_by_index(begin = var_1923_begin_0, end = concat_191, end_mask = var_1923_end_mask_0, x = mask_to_fp16)[name = string("op_1923_cast_fp16")]; int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; tensor var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor([0, 0])]; tensor var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor([true, false])]; tensor var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_192, end_mask = var_1924_end_mask_0, x = var_1923_cast_fp16)[name = string("op_1924_cast_fp16")]; tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1924_cast_fp16)[name = string("qk_51_cast_fp16")]; tensor var_1927_cast_fp16 = softmax(axis = var_1836, x = qk_51_cast_fp16)[name = string("op_1927_cast_fp16")]; bool var_1929_transpose_x_0 = const()[name = string("op_1929_transpose_x_0"), val = bool(false)]; bool var_1929_transpose_y_0 = const()[name = string("op_1929_transpose_y_0"), val = bool(false)]; tensor v_85_cast_fp16 = transpose(perm = var_1920, x = var_1919_cast_fp16)[name = string("transpose_416")]; tensor var_1929_cast_fp16 = matmul(transpose_x = var_1929_transpose_x_0, transpose_y = var_1929_transpose_y_0, x = var_1927_cast_fp16, y = v_85_cast_fp16)[name = string("op_1929_cast_fp16")]; tensor var_1930 = const()[name = string("op_1930"), val = tensor([0, 2, 1, 3])]; tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 1024])]; tensor var_1931_cast_fp16 = transpose(perm = var_1930, x = var_1929_cast_fp16)[name = string("transpose_413")]; tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1931_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1935_to_fp16 = const()[name = string("op_1935_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352066880)))]; tensor var_1936_to_fp16 = const()[name = string("op_1936_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354164096)))]; tensor linear_67_cast_fp16 = linear(bias = var_1936_to_fp16, weight = var_1935_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1943_axes_0 = const()[name = string("op_1943_axes_0"), val = tensor([-1])]; tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354166208)))]; tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354168320)))]; tensor var_1943_cast_fp16 = layer_norm(axes = var_1943_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1943_cast_fp16")]; tensor var_1952_to_fp16 = const()[name = string("op_1952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354170432)))]; tensor var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356267648)))]; tensor linear_68_cast_fp16 = linear(bias = var_1953_to_fp16, weight = var_1952_to_fp16, x = var_1943_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 16, 64])]; tensor var_1973_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_1973_cast_fp16")]; tensor const_154_to_fp16 = const()[name = string("const_154_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_71_cast_fp16 = mul(x = var_1973_cast_fp16, y = const_154_to_fp16)[name = string("q_71_cast_fp16")]; tensor var_1979 = const()[name = string("op_1979"), val = tensor([1, 1500, 16, -1])]; tensor var_1980_cast_fp16 = reshape(shape = var_1979, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1980_cast_fp16")]; tensor const_155_to_fp16 = const()[name = string("const_155_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_89_cast_fp16 = mul(x = var_1980_cast_fp16, y = const_155_to_fp16)[name = string("k_89_cast_fp16")]; tensor var_1986 = const()[name = string("op_1986"), val = tensor([1, 1500, 16, -1])]; tensor var_1987_cast_fp16 = reshape(shape = var_1986, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1987_cast_fp16")]; tensor var_1988 = const()[name = string("op_1988"), val = tensor([0, 2, 1, 3])]; bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; tensor transpose_227_perm_0 = const()[name = string("transpose_227_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_228_perm_0 = const()[name = string("transpose_228_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_228 = transpose(perm = transpose_228_perm_0, x = k_89_cast_fp16)[name = string("transpose_410")]; tensor transpose_227 = transpose(perm = transpose_227_perm_0, x = q_71_cast_fp16)[name = string("transpose_411")]; tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_227, y = transpose_228)[name = string("qk_53_cast_fp16")]; tensor var_1992_cast_fp16 = softmax(axis = var_1836, x = qk_53_cast_fp16)[name = string("op_1992_cast_fp16")]; bool var_1994_transpose_x_0 = const()[name = string("op_1994_transpose_x_0"), val = bool(false)]; bool var_1994_transpose_y_0 = const()[name = string("op_1994_transpose_y_0"), val = bool(false)]; tensor v_89_cast_fp16 = transpose(perm = var_1988, x = var_1987_cast_fp16)[name = string("transpose_412")]; tensor var_1994_cast_fp16 = matmul(transpose_x = var_1994_transpose_x_0, transpose_y = var_1994_transpose_y_0, x = var_1992_cast_fp16, y = v_89_cast_fp16)[name = string("op_1994_cast_fp16")]; tensor var_1995 = const()[name = string("op_1995"), val = tensor([0, 2, 1, 3])]; tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 1024])]; tensor var_1996_cast_fp16 = transpose(perm = var_1995, x = var_1994_cast_fp16)[name = string("transpose_409")]; tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_1996_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_2000_to_fp16 = const()[name = string("op_2000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356269760)))]; tensor var_2001_to_fp16 = const()[name = string("op_2001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358366976)))]; tensor linear_69_cast_fp16 = linear(bias = var_2001_to_fp16, weight = var_2000_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; tensor var_2008_axes_0 = const()[name = string("op_2008_axes_0"), val = tensor([-1])]; tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358369088)))]; tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358371200)))]; tensor var_2008_cast_fp16 = layer_norm(axes = var_2008_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2008_cast_fp16")]; tensor var_2017_to_fp16 = const()[name = string("op_2017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358373312)))]; tensor var_2018_to_fp16 = const()[name = string("op_2018_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366761984)))]; tensor linear_70_cast_fp16 = linear(bias = var_2018_to_fp16, weight = var_2017_to_fp16, x = var_2008_cast_fp16)[name = string("linear_70_cast_fp16")]; string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; tensor var_2023_to_fp16 = const()[name = string("op_2023_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366770240)))]; tensor var_2024_to_fp16 = const()[name = string("op_2024_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375158912)))]; tensor linear_71_cast_fp16 = linear(bias = var_2024_to_fp16, weight = var_2023_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 1024])]; tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_37_cast_fp16")]; tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 1024])]; tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_37_cast_fp16")]; tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 1024])]; tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 1024])]; tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; int32 var_2047 = const()[name = string("op_2047"), val = int32(-1)]; tensor var_2065_axes_0 = const()[name = string("op_2065_axes_0"), val = tensor([-1])]; tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375161024)))]; tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375163136)))]; fp16 var_2053_to_fp16 = const()[name = string("op_2053_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2065_cast_fp16 = layer_norm(axes = var_2065_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2065_cast_fp16")]; tensor var_2076_to_fp16 = const()[name = string("op_2076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375165248)))]; tensor var_2077_to_fp16 = const()[name = string("op_2077_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377262464)))]; tensor linear_72_cast_fp16 = linear(bias = var_2077_to_fp16, weight = var_2076_to_fp16, x = var_2065_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor var_2080_to_fp16 = const()[name = string("op_2080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377264576)))]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2080_to_fp16, x = var_2065_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor var_2084_to_fp16 = const()[name = string("op_2084_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379361792)))]; tensor var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381459008)))]; tensor linear_74_cast_fp16 = linear(bias = var_2085_to_fp16, weight = var_2084_to_fp16, x = var_2065_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2087_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2087_shape_cast_fp16")]; int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; string var_2087_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2087_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; tensor var_2087_shape_cast_fp16_to_uint16 = cast(dtype = var_2087_shape_cast_fp16_to_uint16_dtype_0, x = var_2087_shape_cast_fp16)[name = string("cast_276")]; uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2087_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_275")]; int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")]; tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")]; int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1024)]; int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; tensor var_2103_begin_0 = const()[name = string("op_2103_begin_0"), val = tensor([0, 0, 0])]; tensor var_2103_end_mask_0 = const()[name = string("op_2103_end_mask_0"), val = tensor([true, false, true])]; tensor var_2103_cast_fp16 = slice_by_index(begin = var_2103_begin_0, end = concat_208, end_mask = var_2103_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2103_cast_fp16")]; tensor var_2106_begin_0 = const()[name = string("op_2106_begin_0"), val = tensor([0, 0, 0])]; tensor var_2106_end_mask_0 = const()[name = string("op_2106_end_mask_0"), val = tensor([true, false, true])]; tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = concat_208, end_mask = var_2106_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 16, 64])]; tensor var_2116_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2116_cast_fp16")]; tensor const_156_to_fp16 = const()[name = string("const_156_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_75_cast_fp16 = mul(x = var_2116_cast_fp16, y = const_156_to_fp16)[name = string("q_75_cast_fp16")]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 16, 64])]; tensor var_2123_cast_fp16 = reshape(shape = concat_211x, x = var_2103_cast_fp16)[name = string("op_2123_cast_fp16")]; tensor const_157_to_fp16 = const()[name = string("const_157_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_95_cast_fp16 = mul(x = var_2123_cast_fp16, y = const_157_to_fp16)[name = string("k_95_cast_fp16")]; tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 16, 64])]; tensor var_2130_cast_fp16 = reshape(shape = concat_212x, x = var_2106_cast_fp16)[name = string("op_2130_cast_fp16")]; tensor var_2131 = const()[name = string("op_2131"), val = tensor([0, 2, 1, 3])]; bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; tensor transpose_229_perm_0 = const()[name = string("transpose_229_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_230_perm_0 = const()[name = string("transpose_230_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_230 = transpose(perm = transpose_230_perm_0, x = k_95_cast_fp16)[name = string("transpose_406")]; tensor transpose_229 = transpose(perm = transpose_229_perm_0, x = q_75_cast_fp16)[name = string("transpose_407")]; tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_229, y = transpose_230)[name = string("qk_55_cast_fp16")]; int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; tensor var_2134_begin_0 = const()[name = string("op_2134_begin_0"), val = tensor([0, 0])]; tensor var_2134_end_mask_0 = const()[name = string("op_2134_end_mask_0"), val = tensor([false, true])]; tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = concat_213, end_mask = var_2134_end_mask_0, x = mask_to_fp16)[name = string("op_2134_cast_fp16")]; int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; tensor var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor([0, 0])]; tensor var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor([true, false])]; tensor var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_214, end_mask = var_2135_end_mask_0, x = var_2134_cast_fp16)[name = string("op_2135_cast_fp16")]; tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2135_cast_fp16)[name = string("qk_57_cast_fp16")]; tensor var_2138_cast_fp16 = softmax(axis = var_2047, x = qk_57_cast_fp16)[name = string("op_2138_cast_fp16")]; bool var_2140_transpose_x_0 = const()[name = string("op_2140_transpose_x_0"), val = bool(false)]; bool var_2140_transpose_y_0 = const()[name = string("op_2140_transpose_y_0"), val = bool(false)]; tensor v_95_cast_fp16 = transpose(perm = var_2131, x = var_2130_cast_fp16)[name = string("transpose_408")]; tensor var_2140_cast_fp16 = matmul(transpose_x = var_2140_transpose_x_0, transpose_y = var_2140_transpose_y_0, x = var_2138_cast_fp16, y = v_95_cast_fp16)[name = string("op_2140_cast_fp16")]; tensor var_2141 = const()[name = string("op_2141"), val = tensor([0, 2, 1, 3])]; tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 1024])]; tensor var_2142_cast_fp16 = transpose(perm = var_2141, x = var_2140_cast_fp16)[name = string("transpose_405")]; tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2142_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_2146_to_fp16 = const()[name = string("op_2146_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381461120)))]; tensor var_2147_to_fp16 = const()[name = string("op_2147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383558336)))]; tensor linear_75_cast_fp16 = linear(bias = var_2147_to_fp16, weight = var_2146_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; tensor var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor([-1])]; tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383560448)))]; tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383562560)))]; tensor var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2154_cast_fp16")]; tensor var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383564672)))]; tensor var_2164_to_fp16 = const()[name = string("op_2164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385661888)))]; tensor linear_76_cast_fp16 = linear(bias = var_2164_to_fp16, weight = var_2163_to_fp16, x = var_2154_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 16, 64])]; tensor var_2184_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2184_cast_fp16")]; tensor const_158_to_fp16 = const()[name = string("const_158_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_79_cast_fp16 = mul(x = var_2184_cast_fp16, y = const_158_to_fp16)[name = string("q_79_cast_fp16")]; tensor var_2190 = const()[name = string("op_2190"), val = tensor([1, 1500, 16, -1])]; tensor var_2191_cast_fp16 = reshape(shape = var_2190, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2191_cast_fp16")]; tensor const_159_to_fp16 = const()[name = string("const_159_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_99_cast_fp16 = mul(x = var_2191_cast_fp16, y = const_159_to_fp16)[name = string("k_99_cast_fp16")]; tensor var_2197 = const()[name = string("op_2197"), val = tensor([1, 1500, 16, -1])]; tensor var_2198_cast_fp16 = reshape(shape = var_2197, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2198_cast_fp16")]; tensor var_2199 = const()[name = string("op_2199"), val = tensor([0, 2, 1, 3])]; bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; tensor transpose_231_perm_0 = const()[name = string("transpose_231_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_232_perm_0 = const()[name = string("transpose_232_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_232 = transpose(perm = transpose_232_perm_0, x = k_99_cast_fp16)[name = string("transpose_402")]; tensor transpose_231 = transpose(perm = transpose_231_perm_0, x = q_79_cast_fp16)[name = string("transpose_403")]; tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_231, y = transpose_232)[name = string("qk_59_cast_fp16")]; tensor var_2203_cast_fp16 = softmax(axis = var_2047, x = qk_59_cast_fp16)[name = string("op_2203_cast_fp16")]; bool var_2205_transpose_x_0 = const()[name = string("op_2205_transpose_x_0"), val = bool(false)]; bool var_2205_transpose_y_0 = const()[name = string("op_2205_transpose_y_0"), val = bool(false)]; tensor v_99_cast_fp16 = transpose(perm = var_2199, x = var_2198_cast_fp16)[name = string("transpose_404")]; tensor var_2205_cast_fp16 = matmul(transpose_x = var_2205_transpose_x_0, transpose_y = var_2205_transpose_y_0, x = var_2203_cast_fp16, y = v_99_cast_fp16)[name = string("op_2205_cast_fp16")]; tensor var_2206 = const()[name = string("op_2206"), val = tensor([0, 2, 1, 3])]; tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 1024])]; tensor var_2207_cast_fp16 = transpose(perm = var_2206, x = var_2205_cast_fp16)[name = string("transpose_401")]; tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2207_cast_fp16)[name = string("x_175_cast_fp16")]; tensor var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385664000)))]; tensor var_2212_to_fp16 = const()[name = string("op_2212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387761216)))]; tensor linear_77_cast_fp16 = linear(bias = var_2212_to_fp16, weight = var_2211_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; tensor var_2219_axes_0 = const()[name = string("op_2219_axes_0"), val = tensor([-1])]; tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387763328)))]; tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387765440)))]; tensor var_2219_cast_fp16 = layer_norm(axes = var_2219_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2219_cast_fp16")]; tensor var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387767552)))]; tensor var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396156224)))]; tensor linear_78_cast_fp16 = linear(bias = var_2229_to_fp16, weight = var_2228_to_fp16, x = var_2219_cast_fp16)[name = string("linear_78_cast_fp16")]; string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396164480)))]; tensor var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404553152)))]; tensor linear_79_cast_fp16 = linear(bias = var_2235_to_fp16, weight = var_2234_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 1024])]; tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_41_cast_fp16")]; tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 1024])]; tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_41_cast_fp16")]; tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 1024])]; tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 1024])]; tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; int32 var_2258 = const()[name = string("op_2258"), val = int32(-1)]; tensor var_2276_axes_0 = const()[name = string("op_2276_axes_0"), val = tensor([-1])]; tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404555264)))]; tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404557376)))]; fp16 var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2276_cast_fp16 = layer_norm(axes = var_2276_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2276_cast_fp16")]; tensor var_2287_to_fp16 = const()[name = string("op_2287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404559488)))]; tensor var_2288_to_fp16 = const()[name = string("op_2288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406656704)))]; tensor linear_80_cast_fp16 = linear(bias = var_2288_to_fp16, weight = var_2287_to_fp16, x = var_2276_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor var_2291_to_fp16 = const()[name = string("op_2291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406658816)))]; tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2291_to_fp16, x = var_2276_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2295_to_fp16 = const()[name = string("op_2295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408756032)))]; tensor var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410853248)))]; tensor linear_82_cast_fp16 = linear(bias = var_2296_to_fp16, weight = var_2295_to_fp16, x = var_2276_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor var_2298_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2298_shape_cast_fp16")]; int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; string var_2298_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2298_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; tensor var_2298_shape_cast_fp16_to_uint16 = cast(dtype = var_2298_shape_cast_fp16_to_uint16_dtype_0, x = var_2298_shape_cast_fp16)[name = string("cast_274")]; uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2298_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_273")]; int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")]; tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")]; int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1024)]; int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; tensor var_2314_begin_0 = const()[name = string("op_2314_begin_0"), val = tensor([0, 0, 0])]; tensor var_2314_end_mask_0 = const()[name = string("op_2314_end_mask_0"), val = tensor([true, false, true])]; tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = concat_230, end_mask = var_2314_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2314_cast_fp16")]; tensor var_2317_begin_0 = const()[name = string("op_2317_begin_0"), val = tensor([0, 0, 0])]; tensor var_2317_end_mask_0 = const()[name = string("op_2317_end_mask_0"), val = tensor([true, false, true])]; tensor var_2317_cast_fp16 = slice_by_index(begin = var_2317_begin_0, end = concat_230, end_mask = var_2317_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2317_cast_fp16")]; tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 16, 64])]; tensor var_2327_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2327_cast_fp16")]; tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_83_cast_fp16 = mul(x = var_2327_cast_fp16, y = const_160_to_fp16)[name = string("q_83_cast_fp16")]; tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 16, 64])]; tensor var_2334_cast_fp16 = reshape(shape = concat_233x, x = var_2314_cast_fp16)[name = string("op_2334_cast_fp16")]; tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_105_cast_fp16 = mul(x = var_2334_cast_fp16, y = const_161_to_fp16)[name = string("k_105_cast_fp16")]; tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 16, 64])]; tensor var_2341_cast_fp16 = reshape(shape = concat_234x, x = var_2317_cast_fp16)[name = string("op_2341_cast_fp16")]; tensor var_2342 = const()[name = string("op_2342"), val = tensor([0, 2, 1, 3])]; bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; tensor transpose_233_perm_0 = const()[name = string("transpose_233_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_234_perm_0 = const()[name = string("transpose_234_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_234 = transpose(perm = transpose_234_perm_0, x = k_105_cast_fp16)[name = string("transpose_398")]; tensor transpose_233 = transpose(perm = transpose_233_perm_0, x = q_83_cast_fp16)[name = string("transpose_399")]; tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_233, y = transpose_234)[name = string("qk_61_cast_fp16")]; int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; tensor var_2345_begin_0 = const()[name = string("op_2345_begin_0"), val = tensor([0, 0])]; tensor var_2345_end_mask_0 = const()[name = string("op_2345_end_mask_0"), val = tensor([false, true])]; tensor var_2345_cast_fp16 = slice_by_index(begin = var_2345_begin_0, end = concat_235, end_mask = var_2345_end_mask_0, x = mask_to_fp16)[name = string("op_2345_cast_fp16")]; int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; tensor var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor([0, 0])]; tensor var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor([true, false])]; tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_236, end_mask = var_2346_end_mask_0, x = var_2345_cast_fp16)[name = string("op_2346_cast_fp16")]; tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2346_cast_fp16)[name = string("qk_63_cast_fp16")]; tensor var_2349_cast_fp16 = softmax(axis = var_2258, x = qk_63_cast_fp16)[name = string("op_2349_cast_fp16")]; bool var_2351_transpose_x_0 = const()[name = string("op_2351_transpose_x_0"), val = bool(false)]; bool var_2351_transpose_y_0 = const()[name = string("op_2351_transpose_y_0"), val = bool(false)]; tensor v_105_cast_fp16 = transpose(perm = var_2342, x = var_2341_cast_fp16)[name = string("transpose_400")]; tensor var_2351_cast_fp16 = matmul(transpose_x = var_2351_transpose_x_0, transpose_y = var_2351_transpose_y_0, x = var_2349_cast_fp16, y = v_105_cast_fp16)[name = string("op_2351_cast_fp16")]; tensor var_2352 = const()[name = string("op_2352"), val = tensor([0, 2, 1, 3])]; tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 1024])]; tensor var_2353_cast_fp16 = transpose(perm = var_2352, x = var_2351_cast_fp16)[name = string("transpose_397")]; tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2353_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_2357_to_fp16 = const()[name = string("op_2357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410855360)))]; tensor var_2358_to_fp16 = const()[name = string("op_2358_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412952576)))]; tensor linear_83_cast_fp16 = linear(bias = var_2358_to_fp16, weight = var_2357_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_2365_axes_0 = const()[name = string("op_2365_axes_0"), val = tensor([-1])]; tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412954688)))]; tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412956800)))]; tensor var_2365_cast_fp16 = layer_norm(axes = var_2365_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2365_cast_fp16")]; tensor var_2374_to_fp16 = const()[name = string("op_2374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412958912)))]; tensor var_2375_to_fp16 = const()[name = string("op_2375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415056128)))]; tensor linear_84_cast_fp16 = linear(bias = var_2375_to_fp16, weight = var_2374_to_fp16, x = var_2365_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 16, 64])]; tensor var_2395_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2395_cast_fp16")]; tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_162_to_fp16)[name = string("q_87_cast_fp16")]; tensor var_2401 = const()[name = string("op_2401"), val = tensor([1, 1500, 16, -1])]; tensor var_2402_cast_fp16 = reshape(shape = var_2401, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2402_cast_fp16")]; tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_109_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_163_to_fp16)[name = string("k_109_cast_fp16")]; tensor var_2408 = const()[name = string("op_2408"), val = tensor([1, 1500, 16, -1])]; tensor var_2409_cast_fp16 = reshape(shape = var_2408, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2409_cast_fp16")]; tensor var_2410 = const()[name = string("op_2410"), val = tensor([0, 2, 1, 3])]; bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; tensor transpose_235_perm_0 = const()[name = string("transpose_235_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_236_perm_0 = const()[name = string("transpose_236_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_236 = transpose(perm = transpose_236_perm_0, x = k_109_cast_fp16)[name = string("transpose_394")]; tensor transpose_235 = transpose(perm = transpose_235_perm_0, x = q_87_cast_fp16)[name = string("transpose_395")]; tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_235, y = transpose_236)[name = string("qk_65_cast_fp16")]; tensor var_2414_cast_fp16 = softmax(axis = var_2258, x = qk_65_cast_fp16)[name = string("op_2414_cast_fp16")]; bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)]; bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)]; tensor v_109_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_396")]; tensor var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_109_cast_fp16)[name = string("op_2416_cast_fp16")]; tensor var_2417 = const()[name = string("op_2417"), val = tensor([0, 2, 1, 3])]; tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 1024])]; tensor var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_393")]; tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2418_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415058240)))]; tensor var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417155456)))]; tensor linear_85_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; tensor var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor([-1])]; tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417157568)))]; tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417159680)))]; tensor var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2430_cast_fp16")]; tensor var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417161792)))]; tensor var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425550464)))]; tensor linear_86_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_86_cast_fp16")]; string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; tensor var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425558720)))]; tensor var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433947392)))]; tensor linear_87_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 1024])]; tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_45_cast_fp16")]; tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 1024])]; tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_45_cast_fp16")]; tensor k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; tensor k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor([12, 1, 1500, 1024])]; tensor k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")]; tensor v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; tensor v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor([12, 1, 1500, 1024])]; tensor v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")]; int32 var_2469 = const()[name = string("op_2469"), val = int32(-1)]; tensor var_2487_axes_0 = const()[name = string("op_2487_axes_0"), val = tensor([-1])]; tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433949504)))]; tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433951616)))]; fp16 var_2475_to_fp16 = const()[name = string("op_2475_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2487_cast_fp16 = layer_norm(axes = var_2487_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2487_cast_fp16")]; tensor var_2498_to_fp16 = const()[name = string("op_2498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433953728)))]; tensor var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436050944)))]; tensor linear_88_cast_fp16 = linear(bias = var_2499_to_fp16, weight = var_2498_to_fp16, x = var_2487_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2502_to_fp16 = const()[name = string("op_2502_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436053056)))]; tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2502_to_fp16, x = var_2487_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor var_2506_to_fp16 = const()[name = string("op_2506_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438150272)))]; tensor var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440247488)))]; tensor linear_90_cast_fp16 = linear(bias = var_2507_to_fp16, weight = var_2506_to_fp16, x = var_2487_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor var_2509_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2509_shape_cast_fp16")]; int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; string var_2509_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2509_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; tensor var_2509_shape_cast_fp16_to_uint16 = cast(dtype = var_2509_shape_cast_fp16_to_uint16_dtype_0, x = var_2509_shape_cast_fp16)[name = string("cast_272")]; uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2509_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_271")]; int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")]; tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")]; tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")]; tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")]; int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1024)]; int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")]; tensor var_2525_begin_0 = const()[name = string("op_2525_begin_0"), val = tensor([0, 0, 0])]; tensor var_2525_end_mask_0 = const()[name = string("op_2525_end_mask_0"), val = tensor([true, false, true])]; tensor var_2525_cast_fp16 = slice_by_index(begin = var_2525_begin_0, end = concat_252, end_mask = var_2525_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2525_cast_fp16")]; tensor var_2528_begin_0 = const()[name = string("op_2528_begin_0"), val = tensor([0, 0, 0])]; tensor var_2528_end_mask_0 = const()[name = string("op_2528_end_mask_0"), val = tensor([true, false, true])]; tensor var_2528_cast_fp16 = slice_by_index(begin = var_2528_begin_0, end = concat_252, end_mask = var_2528_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2528_cast_fp16")]; tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 16, 64])]; tensor var_2538_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2538_cast_fp16")]; tensor const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_91_cast_fp16 = mul(x = var_2538_cast_fp16, y = const_164_to_fp16)[name = string("q_91_cast_fp16")]; tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 16, 64])]; tensor var_2545_cast_fp16 = reshape(shape = concat_255x, x = var_2525_cast_fp16)[name = string("op_2545_cast_fp16")]; tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_115_cast_fp16 = mul(x = var_2545_cast_fp16, y = const_165_to_fp16)[name = string("k_115_cast_fp16")]; tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 16, 64])]; tensor var_2552_cast_fp16 = reshape(shape = concat_256x, x = var_2528_cast_fp16)[name = string("op_2552_cast_fp16")]; tensor var_2553 = const()[name = string("op_2553"), val = tensor([0, 2, 1, 3])]; bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; tensor transpose_237_perm_0 = const()[name = string("transpose_237_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_238_perm_0 = const()[name = string("transpose_238_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_238 = transpose(perm = transpose_238_perm_0, x = k_115_cast_fp16)[name = string("transpose_390")]; tensor transpose_237 = transpose(perm = transpose_237_perm_0, x = q_91_cast_fp16)[name = string("transpose_391")]; tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_237, y = transpose_238)[name = string("qk_67_cast_fp16")]; int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; tensor var_2556_begin_0 = const()[name = string("op_2556_begin_0"), val = tensor([0, 0])]; tensor var_2556_end_mask_0 = const()[name = string("op_2556_end_mask_0"), val = tensor([false, true])]; tensor var_2556_cast_fp16 = slice_by_index(begin = var_2556_begin_0, end = concat_257, end_mask = var_2556_end_mask_0, x = mask_to_fp16)[name = string("op_2556_cast_fp16")]; int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; tensor var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor([0, 0])]; tensor var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor([true, false])]; tensor var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_258, end_mask = var_2557_end_mask_0, x = var_2556_cast_fp16)[name = string("op_2557_cast_fp16")]; tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2557_cast_fp16)[name = string("qk_69_cast_fp16")]; tensor var_2560_cast_fp16 = softmax(axis = var_2469, x = qk_69_cast_fp16)[name = string("op_2560_cast_fp16")]; bool var_2562_transpose_x_0 = const()[name = string("op_2562_transpose_x_0"), val = bool(false)]; bool var_2562_transpose_y_0 = const()[name = string("op_2562_transpose_y_0"), val = bool(false)]; tensor v_115_cast_fp16 = transpose(perm = var_2553, x = var_2552_cast_fp16)[name = string("transpose_392")]; tensor var_2562_cast_fp16 = matmul(transpose_x = var_2562_transpose_x_0, transpose_y = var_2562_transpose_y_0, x = var_2560_cast_fp16, y = v_115_cast_fp16)[name = string("op_2562_cast_fp16")]; tensor var_2563 = const()[name = string("op_2563"), val = tensor([0, 2, 1, 3])]; tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 1024])]; tensor var_2564_cast_fp16 = transpose(perm = var_2563, x = var_2562_cast_fp16)[name = string("transpose_389")]; tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2564_cast_fp16)[name = string("x_205_cast_fp16")]; tensor var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440249600)))]; tensor var_2569_to_fp16 = const()[name = string("op_2569_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442346816)))]; tensor linear_91_cast_fp16 = linear(bias = var_2569_to_fp16, weight = var_2568_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_2576_axes_0 = const()[name = string("op_2576_axes_0"), val = tensor([-1])]; tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442348928)))]; tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442351040)))]; tensor var_2576_cast_fp16 = layer_norm(axes = var_2576_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2576_cast_fp16")]; tensor var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442353152)))]; tensor var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444450368)))]; tensor linear_92_cast_fp16 = linear(bias = var_2586_to_fp16, weight = var_2585_to_fp16, x = var_2576_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 16, 64])]; tensor var_2606_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2606_cast_fp16")]; tensor const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_95_cast_fp16 = mul(x = var_2606_cast_fp16, y = const_166_to_fp16)[name = string("q_95_cast_fp16")]; tensor var_2612 = const()[name = string("op_2612"), val = tensor([1, 1500, 16, -1])]; tensor var_2613_cast_fp16 = reshape(shape = var_2612, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2613_cast_fp16")]; tensor const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_119_cast_fp16 = mul(x = var_2613_cast_fp16, y = const_167_to_fp16)[name = string("k_119_cast_fp16")]; tensor var_2619 = const()[name = string("op_2619"), val = tensor([1, 1500, 16, -1])]; tensor var_2620_cast_fp16 = reshape(shape = var_2619, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2620_cast_fp16")]; tensor var_2621 = const()[name = string("op_2621"), val = tensor([0, 2, 1, 3])]; bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)]; bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)]; tensor transpose_239_perm_0 = const()[name = string("transpose_239_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_240_perm_0 = const()[name = string("transpose_240_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_240 = transpose(perm = transpose_240_perm_0, x = k_119_cast_fp16)[name = string("transpose_386")]; tensor transpose_239 = transpose(perm = transpose_239_perm_0, x = q_95_cast_fp16)[name = string("transpose_387")]; tensor qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_239, y = transpose_240)[name = string("qk_71_cast_fp16")]; tensor var_2625_cast_fp16 = softmax(axis = var_2469, x = qk_71_cast_fp16)[name = string("op_2625_cast_fp16")]; bool var_2627_transpose_x_0 = const()[name = string("op_2627_transpose_x_0"), val = bool(false)]; bool var_2627_transpose_y_0 = const()[name = string("op_2627_transpose_y_0"), val = bool(false)]; tensor v_119_cast_fp16 = transpose(perm = var_2621, x = var_2620_cast_fp16)[name = string("transpose_388")]; tensor var_2627_cast_fp16 = matmul(transpose_x = var_2627_transpose_x_0, transpose_y = var_2627_transpose_y_0, x = var_2625_cast_fp16, y = v_119_cast_fp16)[name = string("op_2627_cast_fp16")]; tensor var_2628 = const()[name = string("op_2628"), val = tensor([0, 2, 1, 3])]; tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 1024])]; tensor var_2629_cast_fp16 = transpose(perm = var_2628, x = var_2627_cast_fp16)[name = string("transpose_385")]; tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2629_cast_fp16)[name = string("x_211_cast_fp16")]; tensor var_2633_to_fp16 = const()[name = string("op_2633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444452480)))]; tensor var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446549696)))]; tensor linear_93_cast_fp16 = linear(bias = var_2634_to_fp16, weight = var_2633_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_2641_axes_0 = const()[name = string("op_2641_axes_0"), val = tensor([-1])]; tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446551808)))]; tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446553920)))]; tensor var_2641_cast_fp16 = layer_norm(axes = var_2641_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2641_cast_fp16")]; tensor var_2650_to_fp16 = const()[name = string("op_2650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446556032)))]; tensor var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454944704)))]; tensor linear_94_cast_fp16 = linear(bias = var_2651_to_fp16, weight = var_2650_to_fp16, x = var_2641_cast_fp16)[name = string("linear_94_cast_fp16")]; string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; tensor var_2656_to_fp16 = const()[name = string("op_2656_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454952960)))]; tensor var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463341632)))]; tensor linear_95_cast_fp16 = linear(bias = var_2657_to_fp16, weight = var_2656_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; tensor k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; tensor k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor([13, 1, 448, 1024])]; tensor k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_49_cast_fp16")]; tensor v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; tensor v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor([13, 1, 448, 1024])]; tensor v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_49_cast_fp16")]; tensor k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; tensor k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor([13, 1, 1500, 1024])]; tensor k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")]; tensor v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; tensor v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor([13, 1, 1500, 1024])]; tensor v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")]; int32 var_2680 = const()[name = string("op_2680"), val = int32(-1)]; tensor var_2698_axes_0 = const()[name = string("op_2698_axes_0"), val = tensor([-1])]; tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463343744)))]; tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463345856)))]; fp16 var_2686_to_fp16 = const()[name = string("op_2686_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2698_cast_fp16 = layer_norm(axes = var_2698_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2698_cast_fp16")]; tensor var_2709_to_fp16 = const()[name = string("op_2709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463347968)))]; tensor var_2710_to_fp16 = const()[name = string("op_2710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465445184)))]; tensor linear_96_cast_fp16 = linear(bias = var_2710_to_fp16, weight = var_2709_to_fp16, x = var_2698_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor var_2713_to_fp16 = const()[name = string("op_2713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465447296)))]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2713_to_fp16, x = var_2698_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467544512)))]; tensor var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469641728)))]; tensor linear_98_cast_fp16 = linear(bias = var_2718_to_fp16, weight = var_2717_to_fp16, x = var_2698_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor var_2720_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2720_shape_cast_fp16")]; int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)]; int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)]; bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)]; string var_2720_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2720_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)]; tensor var_2720_shape_cast_fp16_to_uint16 = cast(dtype = var_2720_shape_cast_fp16_to_uint16_dtype_0, x = var_2720_shape_cast_fp16)[name = string("cast_270")]; uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2720_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")]; string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_269")]; int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([0])]; tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")]; tensor concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor([12])]; int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)]; bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)]; tensor concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")]; tensor concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor([0])]; tensor concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor([0])]; tensor concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor([0])]; int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)]; bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)]; tensor concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")]; tensor k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")]; tensor v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")]; int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)]; int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1024)]; int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)]; bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)]; tensor concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")]; tensor var_2736_begin_0 = const()[name = string("op_2736_begin_0"), val = tensor([0, 0, 0])]; tensor var_2736_end_mask_0 = const()[name = string("op_2736_end_mask_0"), val = tensor([true, false, true])]; tensor var_2736_cast_fp16 = slice_by_index(begin = var_2736_begin_0, end = concat_274, end_mask = var_2736_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2736_cast_fp16")]; tensor var_2739_begin_0 = const()[name = string("op_2739_begin_0"), val = tensor([0, 0, 0])]; tensor var_2739_end_mask_0 = const()[name = string("op_2739_end_mask_0"), val = tensor([true, false, true])]; tensor var_2739_cast_fp16 = slice_by_index(begin = var_2739_begin_0, end = concat_274, end_mask = var_2739_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2739_cast_fp16")]; tensor concat_276x = const()[name = string("concat_276x"), val = tensor([1, -1, 16, 64])]; tensor var_2749_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2749_cast_fp16")]; tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_99_cast_fp16 = mul(x = var_2749_cast_fp16, y = const_168_to_fp16)[name = string("q_99_cast_fp16")]; tensor concat_277x = const()[name = string("concat_277x"), val = tensor([1, -1, 16, 64])]; tensor var_2756_cast_fp16 = reshape(shape = concat_277x, x = var_2736_cast_fp16)[name = string("op_2756_cast_fp16")]; tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_125_cast_fp16 = mul(x = var_2756_cast_fp16, y = const_169_to_fp16)[name = string("k_125_cast_fp16")]; tensor concat_278x = const()[name = string("concat_278x"), val = tensor([1, -1, 16, 64])]; tensor var_2763_cast_fp16 = reshape(shape = concat_278x, x = var_2739_cast_fp16)[name = string("op_2763_cast_fp16")]; tensor var_2764 = const()[name = string("op_2764"), val = tensor([0, 2, 1, 3])]; bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)]; bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)]; tensor transpose_241_perm_0 = const()[name = string("transpose_241_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_242_perm_0 = const()[name = string("transpose_242_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_242 = transpose(perm = transpose_242_perm_0, x = k_125_cast_fp16)[name = string("transpose_382")]; tensor transpose_241 = transpose(perm = transpose_241_perm_0, x = q_99_cast_fp16)[name = string("transpose_383")]; tensor qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_241, y = transpose_242)[name = string("qk_73_cast_fp16")]; int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)]; int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")]; tensor var_2767_begin_0 = const()[name = string("op_2767_begin_0"), val = tensor([0, 0])]; tensor var_2767_end_mask_0 = const()[name = string("op_2767_end_mask_0"), val = tensor([false, true])]; tensor var_2767_cast_fp16 = slice_by_index(begin = var_2767_begin_0, end = concat_279, end_mask = var_2767_end_mask_0, x = mask_to_fp16)[name = string("op_2767_cast_fp16")]; int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)]; int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)]; bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)]; tensor concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")]; tensor var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor([0, 0])]; tensor var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor([true, false])]; tensor var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_280, end_mask = var_2768_end_mask_0, x = var_2767_cast_fp16)[name = string("op_2768_cast_fp16")]; tensor qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2768_cast_fp16)[name = string("qk_75_cast_fp16")]; tensor var_2771_cast_fp16 = softmax(axis = var_2680, x = qk_75_cast_fp16)[name = string("op_2771_cast_fp16")]; bool var_2773_transpose_x_0 = const()[name = string("op_2773_transpose_x_0"), val = bool(false)]; bool var_2773_transpose_y_0 = const()[name = string("op_2773_transpose_y_0"), val = bool(false)]; tensor v_125_cast_fp16 = transpose(perm = var_2764, x = var_2763_cast_fp16)[name = string("transpose_384")]; tensor var_2773_cast_fp16 = matmul(transpose_x = var_2773_transpose_x_0, transpose_y = var_2773_transpose_y_0, x = var_2771_cast_fp16, y = v_125_cast_fp16)[name = string("op_2773_cast_fp16")]; tensor var_2774 = const()[name = string("op_2774"), val = tensor([0, 2, 1, 3])]; tensor concat_281x = const()[name = string("concat_281x"), val = tensor([1, -1, 1024])]; tensor var_2775_cast_fp16 = transpose(perm = var_2774, x = var_2773_cast_fp16)[name = string("transpose_381")]; tensor x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2775_cast_fp16)[name = string("x_223_cast_fp16")]; tensor var_2779_to_fp16 = const()[name = string("op_2779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469643840)))]; tensor var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471741056)))]; tensor linear_99_cast_fp16 = linear(bias = var_2780_to_fp16, weight = var_2779_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")]; tensor var_2787_axes_0 = const()[name = string("op_2787_axes_0"), val = tensor([-1])]; tensor blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471743168)))]; tensor blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471745280)))]; tensor var_2787_cast_fp16 = layer_norm(axes = var_2787_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2787_cast_fp16")]; tensor var_2796_to_fp16 = const()[name = string("op_2796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471747392)))]; tensor var_2797_to_fp16 = const()[name = string("op_2797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473844608)))]; tensor linear_100_cast_fp16 = linear(bias = var_2797_to_fp16, weight = var_2796_to_fp16, x = var_2787_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor concat_282 = const()[name = string("concat_282"), val = tensor([0, 0, 0])]; tensor concat_283 = const()[name = string("concat_283"), val = tensor([0, 1500, 0])]; tensor k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")]; tensor concat_284 = const()[name = string("concat_284"), val = tensor([0, 0, 0])]; tensor concat_285 = const()[name = string("concat_285"), val = tensor([0, 1500, 0])]; tensor v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")]; tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 16, 64])]; tensor var_2817_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2817_cast_fp16")]; tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_103_cast_fp16 = mul(x = var_2817_cast_fp16, y = const_170_to_fp16)[name = string("q_103_cast_fp16")]; tensor var_2823 = const()[name = string("op_2823"), val = tensor([1, 1500, 16, -1])]; tensor var_2824_cast_fp16 = reshape(shape = var_2823, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2824_cast_fp16")]; tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_129_cast_fp16 = mul(x = var_2824_cast_fp16, y = const_171_to_fp16)[name = string("k_129_cast_fp16")]; tensor var_2830 = const()[name = string("op_2830"), val = tensor([1, 1500, 16, -1])]; tensor var_2831_cast_fp16 = reshape(shape = var_2830, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2831_cast_fp16")]; tensor var_2832 = const()[name = string("op_2832"), val = tensor([0, 2, 1, 3])]; bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)]; bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)]; tensor transpose_243_perm_0 = const()[name = string("transpose_243_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_244_perm_0 = const()[name = string("transpose_244_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_244 = transpose(perm = transpose_244_perm_0, x = k_129_cast_fp16)[name = string("transpose_378")]; tensor transpose_243 = transpose(perm = transpose_243_perm_0, x = q_103_cast_fp16)[name = string("transpose_379")]; tensor qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_243, y = transpose_244)[name = string("qk_77_cast_fp16")]; tensor var_2836_cast_fp16 = softmax(axis = var_2680, x = qk_77_cast_fp16)[name = string("op_2836_cast_fp16")]; bool var_2838_transpose_x_0 = const()[name = string("op_2838_transpose_x_0"), val = bool(false)]; bool var_2838_transpose_y_0 = const()[name = string("op_2838_transpose_y_0"), val = bool(false)]; tensor v_129_cast_fp16 = transpose(perm = var_2832, x = var_2831_cast_fp16)[name = string("transpose_380")]; tensor var_2838_cast_fp16 = matmul(transpose_x = var_2838_transpose_x_0, transpose_y = var_2838_transpose_y_0, x = var_2836_cast_fp16, y = v_129_cast_fp16)[name = string("op_2838_cast_fp16")]; tensor var_2839 = const()[name = string("op_2839"), val = tensor([0, 2, 1, 3])]; tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 1024])]; tensor var_2840_cast_fp16 = transpose(perm = var_2839, x = var_2838_cast_fp16)[name = string("transpose_377")]; tensor x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2840_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_2844_to_fp16 = const()[name = string("op_2844_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473846720)))]; tensor var_2845_to_fp16 = const()[name = string("op_2845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475943936)))]; tensor linear_101_cast_fp16 = linear(bias = var_2845_to_fp16, weight = var_2844_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")]; tensor var_2852_axes_0 = const()[name = string("op_2852_axes_0"), val = tensor([-1])]; tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475946048)))]; tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475948160)))]; tensor var_2852_cast_fp16 = layer_norm(axes = var_2852_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2852_cast_fp16")]; tensor var_2861_to_fp16 = const()[name = string("op_2861_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475950272)))]; tensor var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484338944)))]; tensor linear_102_cast_fp16 = linear(bias = var_2862_to_fp16, weight = var_2861_to_fp16, x = var_2852_cast_fp16)[name = string("linear_102_cast_fp16")]; string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")]; tensor x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_2867_to_fp16 = const()[name = string("op_2867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484347200)))]; tensor var_2868_to_fp16 = const()[name = string("op_2868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492735872)))]; tensor linear_103_cast_fp16 = linear(bias = var_2868_to_fp16, weight = var_2867_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")]; tensor k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; tensor k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor([14, 1, 448, 1024])]; tensor k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_53_cast_fp16")]; tensor v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; tensor v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor([14, 1, 448, 1024])]; tensor v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_53_cast_fp16")]; tensor k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; tensor k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor([14, 1, 1500, 1024])]; tensor k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")]; tensor v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; tensor v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor([14, 1, 1500, 1024])]; tensor v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")]; int32 var_2891 = const()[name = string("op_2891"), val = int32(-1)]; tensor var_2909_axes_0 = const()[name = string("op_2909_axes_0"), val = tensor([-1])]; tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492737984)))]; tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492740096)))]; fp16 var_2897_to_fp16 = const()[name = string("op_2897_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2909_cast_fp16 = layer_norm(axes = var_2909_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2909_cast_fp16")]; tensor var_2920_to_fp16 = const()[name = string("op_2920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492742208)))]; tensor var_2921_to_fp16 = const()[name = string("op_2921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494839424)))]; tensor linear_104_cast_fp16 = linear(bias = var_2921_to_fp16, weight = var_2920_to_fp16, x = var_2909_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor var_2924_to_fp16 = const()[name = string("op_2924_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494841536)))]; tensor linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2924_to_fp16, x = var_2909_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor var_2928_to_fp16 = const()[name = string("op_2928_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496938752)))]; tensor var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499035968)))]; tensor linear_106_cast_fp16 = linear(bias = var_2929_to_fp16, weight = var_2928_to_fp16, x = var_2909_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor var_2931_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2931_shape_cast_fp16")]; int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)]; int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)]; bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)]; string var_2931_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2931_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)]; tensor var_2931_shape_cast_fp16_to_uint16 = cast(dtype = var_2931_shape_cast_fp16_to_uint16_dtype_0, x = var_2931_shape_cast_fp16)[name = string("cast_268")]; uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2931_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")]; string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_267")]; int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([0])]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([0])]; tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")]; tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([13])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")]; tensor concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor([0])]; tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")]; tensor k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")]; tensor v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")]; int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)]; int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1024)]; int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)]; bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)]; tensor concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")]; tensor var_2947_begin_0 = const()[name = string("op_2947_begin_0"), val = tensor([0, 0, 0])]; tensor var_2947_end_mask_0 = const()[name = string("op_2947_end_mask_0"), val = tensor([true, false, true])]; tensor var_2947_cast_fp16 = slice_by_index(begin = var_2947_begin_0, end = concat_296, end_mask = var_2947_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2947_cast_fp16")]; tensor var_2950_begin_0 = const()[name = string("op_2950_begin_0"), val = tensor([0, 0, 0])]; tensor var_2950_end_mask_0 = const()[name = string("op_2950_end_mask_0"), val = tensor([true, false, true])]; tensor var_2950_cast_fp16 = slice_by_index(begin = var_2950_begin_0, end = concat_296, end_mask = var_2950_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2950_cast_fp16")]; tensor concat_298x = const()[name = string("concat_298x"), val = tensor([1, -1, 16, 64])]; tensor var_2960_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2960_cast_fp16")]; tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_107_cast_fp16 = mul(x = var_2960_cast_fp16, y = const_172_to_fp16)[name = string("q_107_cast_fp16")]; tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, -1, 16, 64])]; tensor var_2967_cast_fp16 = reshape(shape = concat_299x, x = var_2947_cast_fp16)[name = string("op_2967_cast_fp16")]; tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_135_cast_fp16 = mul(x = var_2967_cast_fp16, y = const_173_to_fp16)[name = string("k_135_cast_fp16")]; tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, -1, 16, 64])]; tensor var_2974_cast_fp16 = reshape(shape = concat_300x, x = var_2950_cast_fp16)[name = string("op_2974_cast_fp16")]; tensor var_2975 = const()[name = string("op_2975"), val = tensor([0, 2, 1, 3])]; bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)]; bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)]; tensor transpose_245_perm_0 = const()[name = string("transpose_245_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_246_perm_0 = const()[name = string("transpose_246_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_246 = transpose(perm = transpose_246_perm_0, x = k_135_cast_fp16)[name = string("transpose_374")]; tensor transpose_245 = transpose(perm = transpose_245_perm_0, x = q_107_cast_fp16)[name = string("transpose_375")]; tensor qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_245, y = transpose_246)[name = string("qk_79_cast_fp16")]; int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)]; int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)]; bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)]; tensor concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")]; tensor var_2978_begin_0 = const()[name = string("op_2978_begin_0"), val = tensor([0, 0])]; tensor var_2978_end_mask_0 = const()[name = string("op_2978_end_mask_0"), val = tensor([false, true])]; tensor var_2978_cast_fp16 = slice_by_index(begin = var_2978_begin_0, end = concat_301, end_mask = var_2978_end_mask_0, x = mask_to_fp16)[name = string("op_2978_cast_fp16")]; int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)]; int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")]; tensor var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor([0, 0])]; tensor var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor([true, false])]; tensor var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_302, end_mask = var_2979_end_mask_0, x = var_2978_cast_fp16)[name = string("op_2979_cast_fp16")]; tensor qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_2979_cast_fp16)[name = string("qk_81_cast_fp16")]; tensor var_2982_cast_fp16 = softmax(axis = var_2891, x = qk_81_cast_fp16)[name = string("op_2982_cast_fp16")]; bool var_2984_transpose_x_0 = const()[name = string("op_2984_transpose_x_0"), val = bool(false)]; bool var_2984_transpose_y_0 = const()[name = string("op_2984_transpose_y_0"), val = bool(false)]; tensor v_135_cast_fp16 = transpose(perm = var_2975, x = var_2974_cast_fp16)[name = string("transpose_376")]; tensor var_2984_cast_fp16 = matmul(transpose_x = var_2984_transpose_x_0, transpose_y = var_2984_transpose_y_0, x = var_2982_cast_fp16, y = v_135_cast_fp16)[name = string("op_2984_cast_fp16")]; tensor var_2985 = const()[name = string("op_2985"), val = tensor([0, 2, 1, 3])]; tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 1024])]; tensor var_2986_cast_fp16 = transpose(perm = var_2985, x = var_2984_cast_fp16)[name = string("transpose_373")]; tensor x_241_cast_fp16 = reshape(shape = concat_303x, x = var_2986_cast_fp16)[name = string("x_241_cast_fp16")]; tensor var_2990_to_fp16 = const()[name = string("op_2990_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499038080)))]; tensor var_2991_to_fp16 = const()[name = string("op_2991_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501135296)))]; tensor linear_107_cast_fp16 = linear(bias = var_2991_to_fp16, weight = var_2990_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")]; tensor var_2998_axes_0 = const()[name = string("op_2998_axes_0"), val = tensor([-1])]; tensor blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501137408)))]; tensor blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501139520)))]; tensor var_2998_cast_fp16 = layer_norm(axes = var_2998_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_2998_cast_fp16")]; tensor var_3007_to_fp16 = const()[name = string("op_3007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501141632)))]; tensor var_3008_to_fp16 = const()[name = string("op_3008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503238848)))]; tensor linear_108_cast_fp16 = linear(bias = var_3008_to_fp16, weight = var_3007_to_fp16, x = var_2998_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor concat_304 = const()[name = string("concat_304"), val = tensor([0, 0, 0])]; tensor concat_305 = const()[name = string("concat_305"), val = tensor([0, 1500, 0])]; tensor k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")]; tensor concat_306 = const()[name = string("concat_306"), val = tensor([0, 0, 0])]; tensor concat_307 = const()[name = string("concat_307"), val = tensor([0, 1500, 0])]; tensor v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")]; tensor concat_308x = const()[name = string("concat_308x"), val = tensor([1, -1, 16, 64])]; tensor var_3028_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3028_cast_fp16")]; tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_111_cast_fp16 = mul(x = var_3028_cast_fp16, y = const_174_to_fp16)[name = string("q_111_cast_fp16")]; tensor var_3034 = const()[name = string("op_3034"), val = tensor([1, 1500, 16, -1])]; tensor var_3035_cast_fp16 = reshape(shape = var_3034, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3035_cast_fp16")]; tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_139_cast_fp16 = mul(x = var_3035_cast_fp16, y = const_175_to_fp16)[name = string("k_139_cast_fp16")]; tensor var_3041 = const()[name = string("op_3041"), val = tensor([1, 1500, 16, -1])]; tensor var_3042_cast_fp16 = reshape(shape = var_3041, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3042_cast_fp16")]; tensor var_3043 = const()[name = string("op_3043"), val = tensor([0, 2, 1, 3])]; bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)]; bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)]; tensor transpose_247_perm_0 = const()[name = string("transpose_247_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_248_perm_0 = const()[name = string("transpose_248_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_248 = transpose(perm = transpose_248_perm_0, x = k_139_cast_fp16)[name = string("transpose_370")]; tensor transpose_247 = transpose(perm = transpose_247_perm_0, x = q_111_cast_fp16)[name = string("transpose_371")]; tensor qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_247, y = transpose_248)[name = string("qk_83_cast_fp16")]; tensor var_3047_cast_fp16 = softmax(axis = var_2891, x = qk_83_cast_fp16)[name = string("op_3047_cast_fp16")]; bool var_3049_transpose_x_0 = const()[name = string("op_3049_transpose_x_0"), val = bool(false)]; bool var_3049_transpose_y_0 = const()[name = string("op_3049_transpose_y_0"), val = bool(false)]; tensor v_139_cast_fp16 = transpose(perm = var_3043, x = var_3042_cast_fp16)[name = string("transpose_372")]; tensor var_3049_cast_fp16 = matmul(transpose_x = var_3049_transpose_x_0, transpose_y = var_3049_transpose_y_0, x = var_3047_cast_fp16, y = v_139_cast_fp16)[name = string("op_3049_cast_fp16")]; tensor var_3050 = const()[name = string("op_3050"), val = tensor([0, 2, 1, 3])]; tensor concat_309x = const()[name = string("concat_309x"), val = tensor([1, -1, 1024])]; tensor var_3051_cast_fp16 = transpose(perm = var_3050, x = var_3049_cast_fp16)[name = string("transpose_369")]; tensor x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3051_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_3055_to_fp16 = const()[name = string("op_3055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503240960)))]; tensor var_3056_to_fp16 = const()[name = string("op_3056_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505338176)))]; tensor linear_109_cast_fp16 = linear(bias = var_3056_to_fp16, weight = var_3055_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_3063_axes_0 = const()[name = string("op_3063_axes_0"), val = tensor([-1])]; tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505340288)))]; tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505342400)))]; tensor var_3063_cast_fp16 = layer_norm(axes = var_3063_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3063_cast_fp16")]; tensor var_3072_to_fp16 = const()[name = string("op_3072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505344512)))]; tensor var_3073_to_fp16 = const()[name = string("op_3073_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513733184)))]; tensor linear_110_cast_fp16 = linear(bias = var_3073_to_fp16, weight = var_3072_to_fp16, x = var_3063_cast_fp16)[name = string("linear_110_cast_fp16")]; string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")]; tensor x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_3078_to_fp16 = const()[name = string("op_3078_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513741440)))]; tensor var_3079_to_fp16 = const()[name = string("op_3079_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522130112)))]; tensor linear_111_cast_fp16 = linear(bias = var_3079_to_fp16, weight = var_3078_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")]; tensor k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; tensor k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor([15, 1, 448, 1024])]; tensor k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_57_cast_fp16")]; tensor v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; tensor v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor([15, 1, 448, 1024])]; tensor v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_57_cast_fp16")]; tensor k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; tensor k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor([15, 1, 1500, 1024])]; tensor k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")]; tensor v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; tensor v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor([15, 1, 1500, 1024])]; tensor v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")]; int32 var_3102 = const()[name = string("op_3102"), val = int32(-1)]; tensor var_3120_axes_0 = const()[name = string("op_3120_axes_0"), val = tensor([-1])]; tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522132224)))]; tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522134336)))]; fp16 var_3108_to_fp16 = const()[name = string("op_3108_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3120_cast_fp16 = layer_norm(axes = var_3120_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3120_cast_fp16")]; tensor var_3131_to_fp16 = const()[name = string("op_3131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522136448)))]; tensor var_3132_to_fp16 = const()[name = string("op_3132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524233664)))]; tensor linear_112_cast_fp16 = linear(bias = var_3132_to_fp16, weight = var_3131_to_fp16, x = var_3120_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor var_3135_to_fp16 = const()[name = string("op_3135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524235776)))]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3135_to_fp16, x = var_3120_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor var_3139_to_fp16 = const()[name = string("op_3139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526332992)))]; tensor var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528430208)))]; tensor linear_114_cast_fp16 = linear(bias = var_3140_to_fp16, weight = var_3139_to_fp16, x = var_3120_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor var_3142_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3142_shape_cast_fp16")]; int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)]; int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)]; bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)]; string var_3142_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3142_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)]; tensor var_3142_shape_cast_fp16_to_uint16 = cast(dtype = var_3142_shape_cast_fp16_to_uint16_dtype_0, x = var_3142_shape_cast_fp16)[name = string("cast_266")]; uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3142_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")]; string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_265")]; int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([0])]; tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([0])]; tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")]; tensor concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor([14])]; int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")]; tensor concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor([0])]; tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")]; tensor k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")]; tensor v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")]; int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)]; int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1024)]; int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)]; bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)]; tensor concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")]; tensor var_3158_begin_0 = const()[name = string("op_3158_begin_0"), val = tensor([0, 0, 0])]; tensor var_3158_end_mask_0 = const()[name = string("op_3158_end_mask_0"), val = tensor([true, false, true])]; tensor var_3158_cast_fp16 = slice_by_index(begin = var_3158_begin_0, end = concat_318, end_mask = var_3158_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3158_cast_fp16")]; tensor var_3161_begin_0 = const()[name = string("op_3161_begin_0"), val = tensor([0, 0, 0])]; tensor var_3161_end_mask_0 = const()[name = string("op_3161_end_mask_0"), val = tensor([true, false, true])]; tensor var_3161_cast_fp16 = slice_by_index(begin = var_3161_begin_0, end = concat_318, end_mask = var_3161_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3161_cast_fp16")]; tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, -1, 16, 64])]; tensor var_3171_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3171_cast_fp16")]; tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_115_cast_fp16 = mul(x = var_3171_cast_fp16, y = const_176_to_fp16)[name = string("q_115_cast_fp16")]; tensor concat_321x = const()[name = string("concat_321x"), val = tensor([1, -1, 16, 64])]; tensor var_3178_cast_fp16 = reshape(shape = concat_321x, x = var_3158_cast_fp16)[name = string("op_3178_cast_fp16")]; tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_145_cast_fp16 = mul(x = var_3178_cast_fp16, y = const_177_to_fp16)[name = string("k_145_cast_fp16")]; tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 16, 64])]; tensor var_3185_cast_fp16 = reshape(shape = concat_322x, x = var_3161_cast_fp16)[name = string("op_3185_cast_fp16")]; tensor var_3186 = const()[name = string("op_3186"), val = tensor([0, 2, 1, 3])]; bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)]; bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)]; tensor transpose_249_perm_0 = const()[name = string("transpose_249_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_250_perm_0 = const()[name = string("transpose_250_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_250 = transpose(perm = transpose_250_perm_0, x = k_145_cast_fp16)[name = string("transpose_366")]; tensor transpose_249 = transpose(perm = transpose_249_perm_0, x = q_115_cast_fp16)[name = string("transpose_367")]; tensor qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_249, y = transpose_250)[name = string("qk_85_cast_fp16")]; int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)]; int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)]; bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)]; tensor concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")]; tensor var_3189_begin_0 = const()[name = string("op_3189_begin_0"), val = tensor([0, 0])]; tensor var_3189_end_mask_0 = const()[name = string("op_3189_end_mask_0"), val = tensor([false, true])]; tensor var_3189_cast_fp16 = slice_by_index(begin = var_3189_begin_0, end = concat_323, end_mask = var_3189_end_mask_0, x = mask_to_fp16)[name = string("op_3189_cast_fp16")]; int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)]; int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)]; bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)]; tensor concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")]; tensor var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor([0, 0])]; tensor var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor([true, false])]; tensor var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_324, end_mask = var_3190_end_mask_0, x = var_3189_cast_fp16)[name = string("op_3190_cast_fp16")]; tensor qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3190_cast_fp16)[name = string("qk_87_cast_fp16")]; tensor var_3193_cast_fp16 = softmax(axis = var_3102, x = qk_87_cast_fp16)[name = string("op_3193_cast_fp16")]; bool var_3195_transpose_x_0 = const()[name = string("op_3195_transpose_x_0"), val = bool(false)]; bool var_3195_transpose_y_0 = const()[name = string("op_3195_transpose_y_0"), val = bool(false)]; tensor v_145_cast_fp16 = transpose(perm = var_3186, x = var_3185_cast_fp16)[name = string("transpose_368")]; tensor var_3195_cast_fp16 = matmul(transpose_x = var_3195_transpose_x_0, transpose_y = var_3195_transpose_y_0, x = var_3193_cast_fp16, y = v_145_cast_fp16)[name = string("op_3195_cast_fp16")]; tensor var_3196 = const()[name = string("op_3196"), val = tensor([0, 2, 1, 3])]; tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 1024])]; tensor var_3197_cast_fp16 = transpose(perm = var_3196, x = var_3195_cast_fp16)[name = string("transpose_365")]; tensor x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3197_cast_fp16)[name = string("x_259_cast_fp16")]; tensor var_3201_to_fp16 = const()[name = string("op_3201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528432320)))]; tensor var_3202_to_fp16 = const()[name = string("op_3202_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530529536)))]; tensor linear_115_cast_fp16 = linear(bias = var_3202_to_fp16, weight = var_3201_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")]; tensor var_3209_axes_0 = const()[name = string("op_3209_axes_0"), val = tensor([-1])]; tensor blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530531648)))]; tensor blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530533760)))]; tensor var_3209_cast_fp16 = layer_norm(axes = var_3209_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3209_cast_fp16")]; tensor var_3218_to_fp16 = const()[name = string("op_3218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530535872)))]; tensor var_3219_to_fp16 = const()[name = string("op_3219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532633088)))]; tensor linear_116_cast_fp16 = linear(bias = var_3219_to_fp16, weight = var_3218_to_fp16, x = var_3209_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor concat_326 = const()[name = string("concat_326"), val = tensor([0, 0, 0])]; tensor concat_327 = const()[name = string("concat_327"), val = tensor([0, 1500, 0])]; tensor k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")]; tensor concat_328 = const()[name = string("concat_328"), val = tensor([0, 0, 0])]; tensor concat_329 = const()[name = string("concat_329"), val = tensor([0, 1500, 0])]; tensor v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")]; tensor concat_330x = const()[name = string("concat_330x"), val = tensor([1, -1, 16, 64])]; tensor var_3239_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3239_cast_fp16")]; tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_119_cast_fp16 = mul(x = var_3239_cast_fp16, y = const_178_to_fp16)[name = string("q_119_cast_fp16")]; tensor var_3245 = const()[name = string("op_3245"), val = tensor([1, 1500, 16, -1])]; tensor var_3246_cast_fp16 = reshape(shape = var_3245, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3246_cast_fp16")]; tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_149_cast_fp16 = mul(x = var_3246_cast_fp16, y = const_179_to_fp16)[name = string("k_149_cast_fp16")]; tensor var_3252 = const()[name = string("op_3252"), val = tensor([1, 1500, 16, -1])]; tensor var_3253_cast_fp16 = reshape(shape = var_3252, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3253_cast_fp16")]; tensor var_3254 = const()[name = string("op_3254"), val = tensor([0, 2, 1, 3])]; bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)]; bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)]; tensor transpose_251_perm_0 = const()[name = string("transpose_251_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_252_perm_0 = const()[name = string("transpose_252_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_252 = transpose(perm = transpose_252_perm_0, x = k_149_cast_fp16)[name = string("transpose_362")]; tensor transpose_251 = transpose(perm = transpose_251_perm_0, x = q_119_cast_fp16)[name = string("transpose_363")]; tensor qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_251, y = transpose_252)[name = string("qk_89_cast_fp16")]; tensor var_3258_cast_fp16 = softmax(axis = var_3102, x = qk_89_cast_fp16)[name = string("op_3258_cast_fp16")]; bool var_3260_transpose_x_0 = const()[name = string("op_3260_transpose_x_0"), val = bool(false)]; bool var_3260_transpose_y_0 = const()[name = string("op_3260_transpose_y_0"), val = bool(false)]; tensor v_149_cast_fp16 = transpose(perm = var_3254, x = var_3253_cast_fp16)[name = string("transpose_364")]; tensor var_3260_cast_fp16 = matmul(transpose_x = var_3260_transpose_x_0, transpose_y = var_3260_transpose_y_0, x = var_3258_cast_fp16, y = v_149_cast_fp16)[name = string("op_3260_cast_fp16")]; tensor var_3261 = const()[name = string("op_3261"), val = tensor([0, 2, 1, 3])]; tensor concat_331x = const()[name = string("concat_331x"), val = tensor([1, -1, 1024])]; tensor var_3262_cast_fp16 = transpose(perm = var_3261, x = var_3260_cast_fp16)[name = string("transpose_361")]; tensor x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3262_cast_fp16)[name = string("x_265_cast_fp16")]; tensor var_3266_to_fp16 = const()[name = string("op_3266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532635200)))]; tensor var_3267_to_fp16 = const()[name = string("op_3267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534732416)))]; tensor linear_117_cast_fp16 = linear(bias = var_3267_to_fp16, weight = var_3266_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_3274_axes_0 = const()[name = string("op_3274_axes_0"), val = tensor([-1])]; tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534734528)))]; tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534736640)))]; tensor var_3274_cast_fp16 = layer_norm(axes = var_3274_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3274_cast_fp16")]; tensor var_3283_to_fp16 = const()[name = string("op_3283_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534738752)))]; tensor var_3284_to_fp16 = const()[name = string("op_3284_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543127424)))]; tensor linear_118_cast_fp16 = linear(bias = var_3284_to_fp16, weight = var_3283_to_fp16, x = var_3274_cast_fp16)[name = string("linear_118_cast_fp16")]; string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")]; tensor x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")]; tensor var_3289_to_fp16 = const()[name = string("op_3289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543135680)))]; tensor var_3290_to_fp16 = const()[name = string("op_3290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551524352)))]; tensor linear_119_cast_fp16 = linear(bias = var_3290_to_fp16, weight = var_3289_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")]; tensor k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; tensor k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor([16, 1, 448, 1024])]; tensor k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_61_cast_fp16")]; tensor v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; tensor v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor([16, 1, 448, 1024])]; tensor v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_61_cast_fp16")]; tensor k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; tensor k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor([16, 1, 1500, 1024])]; tensor k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")]; tensor v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; tensor v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor([16, 1, 1500, 1024])]; tensor v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")]; int32 var_3313 = const()[name = string("op_3313"), val = int32(-1)]; tensor var_3331_axes_0 = const()[name = string("op_3331_axes_0"), val = tensor([-1])]; tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551526464)))]; tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551528576)))]; fp16 var_3319_to_fp16 = const()[name = string("op_3319_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3331_cast_fp16 = layer_norm(axes = var_3331_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3331_cast_fp16")]; tensor var_3342_to_fp16 = const()[name = string("op_3342_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551530688)))]; tensor var_3343_to_fp16 = const()[name = string("op_3343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553627904)))]; tensor linear_120_cast_fp16 = linear(bias = var_3343_to_fp16, weight = var_3342_to_fp16, x = var_3331_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor var_3346_to_fp16 = const()[name = string("op_3346_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553630016)))]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3346_to_fp16, x = var_3331_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor var_3350_to_fp16 = const()[name = string("op_3350_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555727232)))]; tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557824448)))]; tensor linear_122_cast_fp16 = linear(bias = var_3351_to_fp16, weight = var_3350_to_fp16, x = var_3331_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor var_3353_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3353_shape_cast_fp16")]; int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)]; int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)]; bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)]; string var_3353_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3353_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)]; tensor var_3353_shape_cast_fp16_to_uint16 = cast(dtype = var_3353_shape_cast_fp16_to_uint16_dtype_0, x = var_3353_shape_cast_fp16)[name = string("cast_264")]; uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3353_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")]; string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_263")]; int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([0])]; tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")]; tensor concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor([15])]; int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)]; bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)]; tensor concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")]; tensor concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor([0])]; tensor concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor([0])]; tensor concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor([0])]; int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)]; bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)]; tensor concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")]; tensor k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")]; tensor v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")]; int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1024)]; int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")]; tensor var_3369_begin_0 = const()[name = string("op_3369_begin_0"), val = tensor([0, 0, 0])]; tensor var_3369_end_mask_0 = const()[name = string("op_3369_end_mask_0"), val = tensor([true, false, true])]; tensor var_3369_cast_fp16 = slice_by_index(begin = var_3369_begin_0, end = concat_340, end_mask = var_3369_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3369_cast_fp16")]; tensor var_3372_begin_0 = const()[name = string("op_3372_begin_0"), val = tensor([0, 0, 0])]; tensor var_3372_end_mask_0 = const()[name = string("op_3372_end_mask_0"), val = tensor([true, false, true])]; tensor var_3372_cast_fp16 = slice_by_index(begin = var_3372_begin_0, end = concat_340, end_mask = var_3372_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3372_cast_fp16")]; tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 16, 64])]; tensor var_3382_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3382_cast_fp16")]; tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_123_cast_fp16 = mul(x = var_3382_cast_fp16, y = const_180_to_fp16)[name = string("q_123_cast_fp16")]; tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 16, 64])]; tensor var_3389_cast_fp16 = reshape(shape = concat_343x, x = var_3369_cast_fp16)[name = string("op_3389_cast_fp16")]; tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_155_cast_fp16 = mul(x = var_3389_cast_fp16, y = const_181_to_fp16)[name = string("k_155_cast_fp16")]; tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 16, 64])]; tensor var_3396_cast_fp16 = reshape(shape = concat_344x, x = var_3372_cast_fp16)[name = string("op_3396_cast_fp16")]; tensor var_3397 = const()[name = string("op_3397"), val = tensor([0, 2, 1, 3])]; bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)]; bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)]; tensor transpose_253_perm_0 = const()[name = string("transpose_253_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_254_perm_0 = const()[name = string("transpose_254_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_254 = transpose(perm = transpose_254_perm_0, x = k_155_cast_fp16)[name = string("transpose_358")]; tensor transpose_253 = transpose(perm = transpose_253_perm_0, x = q_123_cast_fp16)[name = string("transpose_359")]; tensor qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_253, y = transpose_254)[name = string("qk_91_cast_fp16")]; int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)]; int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")]; tensor var_3400_begin_0 = const()[name = string("op_3400_begin_0"), val = tensor([0, 0])]; tensor var_3400_end_mask_0 = const()[name = string("op_3400_end_mask_0"), val = tensor([false, true])]; tensor var_3400_cast_fp16 = slice_by_index(begin = var_3400_begin_0, end = concat_345, end_mask = var_3400_end_mask_0, x = mask_to_fp16)[name = string("op_3400_cast_fp16")]; int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)]; int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)]; bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)]; tensor concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")]; tensor var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor([0, 0])]; tensor var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor([true, false])]; tensor var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_346, end_mask = var_3401_end_mask_0, x = var_3400_cast_fp16)[name = string("op_3401_cast_fp16")]; tensor qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3401_cast_fp16)[name = string("qk_93_cast_fp16")]; tensor var_3404_cast_fp16 = softmax(axis = var_3313, x = qk_93_cast_fp16)[name = string("op_3404_cast_fp16")]; bool var_3406_transpose_x_0 = const()[name = string("op_3406_transpose_x_0"), val = bool(false)]; bool var_3406_transpose_y_0 = const()[name = string("op_3406_transpose_y_0"), val = bool(false)]; tensor v_155_cast_fp16 = transpose(perm = var_3397, x = var_3396_cast_fp16)[name = string("transpose_360")]; tensor var_3406_cast_fp16 = matmul(transpose_x = var_3406_transpose_x_0, transpose_y = var_3406_transpose_y_0, x = var_3404_cast_fp16, y = v_155_cast_fp16)[name = string("op_3406_cast_fp16")]; tensor var_3407 = const()[name = string("op_3407"), val = tensor([0, 2, 1, 3])]; tensor concat_347x = const()[name = string("concat_347x"), val = tensor([1, -1, 1024])]; tensor var_3408_cast_fp16 = transpose(perm = var_3407, x = var_3406_cast_fp16)[name = string("transpose_357")]; tensor x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3408_cast_fp16)[name = string("x_277_cast_fp16")]; tensor var_3412_to_fp16 = const()[name = string("op_3412_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557826560)))]; tensor var_3413_to_fp16 = const()[name = string("op_3413_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559923776)))]; tensor linear_123_cast_fp16 = linear(bias = var_3413_to_fp16, weight = var_3412_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")]; tensor var_3420_axes_0 = const()[name = string("op_3420_axes_0"), val = tensor([-1])]; tensor blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559925888)))]; tensor blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559928000)))]; tensor var_3420_cast_fp16 = layer_norm(axes = var_3420_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3420_cast_fp16")]; tensor var_3429_to_fp16 = const()[name = string("op_3429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559930112)))]; tensor var_3430_to_fp16 = const()[name = string("op_3430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562027328)))]; tensor linear_124_cast_fp16 = linear(bias = var_3430_to_fp16, weight = var_3429_to_fp16, x = var_3420_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor concat_348 = const()[name = string("concat_348"), val = tensor([0, 0, 0])]; tensor concat_349 = const()[name = string("concat_349"), val = tensor([0, 1500, 0])]; tensor k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")]; tensor concat_350 = const()[name = string("concat_350"), val = tensor([0, 0, 0])]; tensor concat_351 = const()[name = string("concat_351"), val = tensor([0, 1500, 0])]; tensor v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")]; tensor concat_352x = const()[name = string("concat_352x"), val = tensor([1, -1, 16, 64])]; tensor var_3450_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3450_cast_fp16")]; tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_127_cast_fp16 = mul(x = var_3450_cast_fp16, y = const_182_to_fp16)[name = string("q_127_cast_fp16")]; tensor var_3456 = const()[name = string("op_3456"), val = tensor([1, 1500, 16, -1])]; tensor var_3457_cast_fp16 = reshape(shape = var_3456, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3457_cast_fp16")]; tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_159_cast_fp16 = mul(x = var_3457_cast_fp16, y = const_183_to_fp16)[name = string("k_159_cast_fp16")]; tensor var_3463 = const()[name = string("op_3463"), val = tensor([1, 1500, 16, -1])]; tensor var_3464_cast_fp16 = reshape(shape = var_3463, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3464_cast_fp16")]; tensor var_3465 = const()[name = string("op_3465"), val = tensor([0, 2, 1, 3])]; bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)]; bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)]; tensor transpose_255_perm_0 = const()[name = string("transpose_255_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_256_perm_0 = const()[name = string("transpose_256_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_256 = transpose(perm = transpose_256_perm_0, x = k_159_cast_fp16)[name = string("transpose_354")]; tensor transpose_255 = transpose(perm = transpose_255_perm_0, x = q_127_cast_fp16)[name = string("transpose_355")]; tensor qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_255, y = transpose_256)[name = string("qk_95_cast_fp16")]; tensor var_3469_cast_fp16 = softmax(axis = var_3313, x = qk_95_cast_fp16)[name = string("op_3469_cast_fp16")]; bool var_3471_transpose_x_0 = const()[name = string("op_3471_transpose_x_0"), val = bool(false)]; bool var_3471_transpose_y_0 = const()[name = string("op_3471_transpose_y_0"), val = bool(false)]; tensor v_159_cast_fp16 = transpose(perm = var_3465, x = var_3464_cast_fp16)[name = string("transpose_356")]; tensor var_3471_cast_fp16 = matmul(transpose_x = var_3471_transpose_x_0, transpose_y = var_3471_transpose_y_0, x = var_3469_cast_fp16, y = v_159_cast_fp16)[name = string("op_3471_cast_fp16")]; tensor var_3472 = const()[name = string("op_3472"), val = tensor([0, 2, 1, 3])]; tensor concat_353x = const()[name = string("concat_353x"), val = tensor([1, -1, 1024])]; tensor var_3473_cast_fp16 = transpose(perm = var_3472, x = var_3471_cast_fp16)[name = string("transpose_353")]; tensor x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3473_cast_fp16)[name = string("x_283_cast_fp16")]; tensor var_3477_to_fp16 = const()[name = string("op_3477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562029440)))]; tensor var_3478_to_fp16 = const()[name = string("op_3478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564126656)))]; tensor linear_125_cast_fp16 = linear(bias = var_3478_to_fp16, weight = var_3477_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")]; tensor var_3485_axes_0 = const()[name = string("op_3485_axes_0"), val = tensor([-1])]; tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564128768)))]; tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564130880)))]; tensor var_3485_cast_fp16 = layer_norm(axes = var_3485_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3485_cast_fp16")]; tensor var_3494_to_fp16 = const()[name = string("op_3494_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564132992)))]; tensor var_3495_to_fp16 = const()[name = string("op_3495_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572521664)))]; tensor linear_126_cast_fp16 = linear(bias = var_3495_to_fp16, weight = var_3494_to_fp16, x = var_3485_cast_fp16)[name = string("linear_126_cast_fp16")]; string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")]; tensor x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572529920)))]; tensor var_3501_to_fp16 = const()[name = string("op_3501_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580918592)))]; tensor linear_127_cast_fp16 = linear(bias = var_3501_to_fp16, weight = var_3500_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")]; tensor k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; tensor k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor([17, 1, 448, 1024])]; tensor k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_65_cast_fp16")]; tensor v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; tensor v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor([17, 1, 448, 1024])]; tensor v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_65_cast_fp16")]; tensor k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; tensor k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor([17, 1, 1500, 1024])]; tensor k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")]; tensor v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; tensor v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor([17, 1, 1500, 1024])]; tensor v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")]; int32 var_3524 = const()[name = string("op_3524"), val = int32(-1)]; tensor var_3542_axes_0 = const()[name = string("op_3542_axes_0"), val = tensor([-1])]; tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580920704)))]; tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580922816)))]; fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3542_cast_fp16 = layer_norm(axes = var_3542_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3542_cast_fp16")]; tensor var_3553_to_fp16 = const()[name = string("op_3553_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580924928)))]; tensor var_3554_to_fp16 = const()[name = string("op_3554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583022144)))]; tensor linear_128_cast_fp16 = linear(bias = var_3554_to_fp16, weight = var_3553_to_fp16, x = var_3542_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor var_3557_to_fp16 = const()[name = string("op_3557_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583024256)))]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3557_to_fp16, x = var_3542_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor var_3561_to_fp16 = const()[name = string("op_3561_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585121472)))]; tensor var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587218688)))]; tensor linear_130_cast_fp16 = linear(bias = var_3562_to_fp16, weight = var_3561_to_fp16, x = var_3542_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_3564_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3564_shape_cast_fp16")]; int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)]; int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)]; bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)]; string var_3564_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3564_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)]; tensor var_3564_shape_cast_fp16_to_uint16 = cast(dtype = var_3564_shape_cast_fp16_to_uint16_dtype_0, x = var_3564_shape_cast_fp16)[name = string("cast_262")]; uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3564_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")]; string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_261")]; int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")]; tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([0])]; tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([0])]; tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")]; tensor concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor([16])]; int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)]; bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)]; tensor concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")]; tensor concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor([0])]; tensor concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor([0])]; tensor concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor([0])]; int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")]; tensor k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")]; tensor v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")]; int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)]; int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1024)]; int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")]; tensor var_3580_begin_0 = const()[name = string("op_3580_begin_0"), val = tensor([0, 0, 0])]; tensor var_3580_end_mask_0 = const()[name = string("op_3580_end_mask_0"), val = tensor([true, false, true])]; tensor var_3580_cast_fp16 = slice_by_index(begin = var_3580_begin_0, end = concat_362, end_mask = var_3580_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3580_cast_fp16")]; tensor var_3583_begin_0 = const()[name = string("op_3583_begin_0"), val = tensor([0, 0, 0])]; tensor var_3583_end_mask_0 = const()[name = string("op_3583_end_mask_0"), val = tensor([true, false, true])]; tensor var_3583_cast_fp16 = slice_by_index(begin = var_3583_begin_0, end = concat_362, end_mask = var_3583_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3583_cast_fp16")]; tensor concat_364x = const()[name = string("concat_364x"), val = tensor([1, -1, 16, 64])]; tensor var_3593_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3593_cast_fp16")]; tensor const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_131_cast_fp16 = mul(x = var_3593_cast_fp16, y = const_184_to_fp16)[name = string("q_131_cast_fp16")]; tensor concat_365x = const()[name = string("concat_365x"), val = tensor([1, -1, 16, 64])]; tensor var_3600_cast_fp16 = reshape(shape = concat_365x, x = var_3580_cast_fp16)[name = string("op_3600_cast_fp16")]; tensor const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_165_cast_fp16 = mul(x = var_3600_cast_fp16, y = const_185_to_fp16)[name = string("k_165_cast_fp16")]; tensor concat_366x = const()[name = string("concat_366x"), val = tensor([1, -1, 16, 64])]; tensor var_3607_cast_fp16 = reshape(shape = concat_366x, x = var_3583_cast_fp16)[name = string("op_3607_cast_fp16")]; tensor var_3608 = const()[name = string("op_3608"), val = tensor([0, 2, 1, 3])]; bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)]; bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)]; tensor transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_258 = transpose(perm = transpose_258_perm_0, x = k_165_cast_fp16)[name = string("transpose_350")]; tensor transpose_257 = transpose(perm = transpose_257_perm_0, x = q_131_cast_fp16)[name = string("transpose_351")]; tensor qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_97_cast_fp16")]; int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)]; int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")]; tensor var_3611_begin_0 = const()[name = string("op_3611_begin_0"), val = tensor([0, 0])]; tensor var_3611_end_mask_0 = const()[name = string("op_3611_end_mask_0"), val = tensor([false, true])]; tensor var_3611_cast_fp16 = slice_by_index(begin = var_3611_begin_0, end = concat_367, end_mask = var_3611_end_mask_0, x = mask_to_fp16)[name = string("op_3611_cast_fp16")]; int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)]; int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)]; bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)]; tensor concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")]; tensor var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor([0, 0])]; tensor var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor([true, false])]; tensor var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_368, end_mask = var_3612_end_mask_0, x = var_3611_cast_fp16)[name = string("op_3612_cast_fp16")]; tensor qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3612_cast_fp16)[name = string("qk_99_cast_fp16")]; tensor var_3615_cast_fp16 = softmax(axis = var_3524, x = qk_99_cast_fp16)[name = string("op_3615_cast_fp16")]; bool var_3617_transpose_x_0 = const()[name = string("op_3617_transpose_x_0"), val = bool(false)]; bool var_3617_transpose_y_0 = const()[name = string("op_3617_transpose_y_0"), val = bool(false)]; tensor v_165_cast_fp16 = transpose(perm = var_3608, x = var_3607_cast_fp16)[name = string("transpose_352")]; tensor var_3617_cast_fp16 = matmul(transpose_x = var_3617_transpose_x_0, transpose_y = var_3617_transpose_y_0, x = var_3615_cast_fp16, y = v_165_cast_fp16)[name = string("op_3617_cast_fp16")]; tensor var_3618 = const()[name = string("op_3618"), val = tensor([0, 2, 1, 3])]; tensor concat_369x = const()[name = string("concat_369x"), val = tensor([1, -1, 1024])]; tensor var_3619_cast_fp16 = transpose(perm = var_3618, x = var_3617_cast_fp16)[name = string("transpose_349")]; tensor x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3619_cast_fp16)[name = string("x_295_cast_fp16")]; tensor var_3623_to_fp16 = const()[name = string("op_3623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587220800)))]; tensor var_3624_to_fp16 = const()[name = string("op_3624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589318016)))]; tensor linear_131_cast_fp16 = linear(bias = var_3624_to_fp16, weight = var_3623_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")]; tensor var_3631_axes_0 = const()[name = string("op_3631_axes_0"), val = tensor([-1])]; tensor blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589320128)))]; tensor blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589322240)))]; tensor var_3631_cast_fp16 = layer_norm(axes = var_3631_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3631_cast_fp16")]; tensor var_3640_to_fp16 = const()[name = string("op_3640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589324352)))]; tensor var_3641_to_fp16 = const()[name = string("op_3641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591421568)))]; tensor linear_132_cast_fp16 = linear(bias = var_3641_to_fp16, weight = var_3640_to_fp16, x = var_3631_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor concat_370 = const()[name = string("concat_370"), val = tensor([0, 0, 0])]; tensor concat_371 = const()[name = string("concat_371"), val = tensor([0, 1500, 0])]; tensor k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")]; tensor concat_372 = const()[name = string("concat_372"), val = tensor([0, 0, 0])]; tensor concat_373 = const()[name = string("concat_373"), val = tensor([0, 1500, 0])]; tensor v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")]; tensor concat_374x = const()[name = string("concat_374x"), val = tensor([1, -1, 16, 64])]; tensor var_3661_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3661_cast_fp16")]; tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_135_cast_fp16 = mul(x = var_3661_cast_fp16, y = const_186_to_fp16)[name = string("q_135_cast_fp16")]; tensor var_3667 = const()[name = string("op_3667"), val = tensor([1, 1500, 16, -1])]; tensor var_3668_cast_fp16 = reshape(shape = var_3667, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3668_cast_fp16")]; tensor const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_169_cast_fp16 = mul(x = var_3668_cast_fp16, y = const_187_to_fp16)[name = string("k_169_cast_fp16")]; tensor var_3674 = const()[name = string("op_3674"), val = tensor([1, 1500, 16, -1])]; tensor var_3675_cast_fp16 = reshape(shape = var_3674, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3675_cast_fp16")]; tensor var_3676 = const()[name = string("op_3676"), val = tensor([0, 2, 1, 3])]; bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)]; bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)]; tensor transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_260 = transpose(perm = transpose_260_perm_0, x = k_169_cast_fp16)[name = string("transpose_346")]; tensor transpose_259 = transpose(perm = transpose_259_perm_0, x = q_135_cast_fp16)[name = string("transpose_347")]; tensor qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_101_cast_fp16")]; tensor var_3680_cast_fp16 = softmax(axis = var_3524, x = qk_101_cast_fp16)[name = string("op_3680_cast_fp16")]; bool var_3682_transpose_x_0 = const()[name = string("op_3682_transpose_x_0"), val = bool(false)]; bool var_3682_transpose_y_0 = const()[name = string("op_3682_transpose_y_0"), val = bool(false)]; tensor v_169_cast_fp16 = transpose(perm = var_3676, x = var_3675_cast_fp16)[name = string("transpose_348")]; tensor var_3682_cast_fp16 = matmul(transpose_x = var_3682_transpose_x_0, transpose_y = var_3682_transpose_y_0, x = var_3680_cast_fp16, y = v_169_cast_fp16)[name = string("op_3682_cast_fp16")]; tensor var_3683 = const()[name = string("op_3683"), val = tensor([0, 2, 1, 3])]; tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, -1, 1024])]; tensor var_3684_cast_fp16 = transpose(perm = var_3683, x = var_3682_cast_fp16)[name = string("transpose_345")]; tensor x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3684_cast_fp16)[name = string("x_301_cast_fp16")]; tensor var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591423680)))]; tensor var_3689_to_fp16 = const()[name = string("op_3689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593520896)))]; tensor linear_133_cast_fp16 = linear(bias = var_3689_to_fp16, weight = var_3688_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")]; tensor var_3696_axes_0 = const()[name = string("op_3696_axes_0"), val = tensor([-1])]; tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593523008)))]; tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593525120)))]; tensor var_3696_cast_fp16 = layer_norm(axes = var_3696_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3696_cast_fp16")]; tensor var_3705_to_fp16 = const()[name = string("op_3705_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593527232)))]; tensor var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601915904)))]; tensor linear_134_cast_fp16 = linear(bias = var_3706_to_fp16, weight = var_3705_to_fp16, x = var_3696_cast_fp16)[name = string("linear_134_cast_fp16")]; string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")]; tensor x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_3711_to_fp16 = const()[name = string("op_3711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601924160)))]; tensor var_3712_to_fp16 = const()[name = string("op_3712_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610312832)))]; tensor linear_135_cast_fp16 = linear(bias = var_3712_to_fp16, weight = var_3711_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")]; tensor k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; tensor k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor([18, 1, 448, 1024])]; tensor k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_69_cast_fp16")]; tensor v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; tensor v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor([18, 1, 448, 1024])]; tensor v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_69_cast_fp16")]; tensor k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; tensor k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor([18, 1, 1500, 1024])]; tensor k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")]; tensor v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; tensor v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor([18, 1, 1500, 1024])]; tensor v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")]; int32 var_3735 = const()[name = string("op_3735"), val = int32(-1)]; tensor var_3753_axes_0 = const()[name = string("op_3753_axes_0"), val = tensor([-1])]; tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610314944)))]; tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610317056)))]; fp16 var_3741_to_fp16 = const()[name = string("op_3741_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3753_cast_fp16 = layer_norm(axes = var_3753_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3753_cast_fp16")]; tensor var_3764_to_fp16 = const()[name = string("op_3764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610319168)))]; tensor var_3765_to_fp16 = const()[name = string("op_3765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612416384)))]; tensor linear_136_cast_fp16 = linear(bias = var_3765_to_fp16, weight = var_3764_to_fp16, x = var_3753_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor var_3768_to_fp16 = const()[name = string("op_3768_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612418496)))]; tensor linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3768_to_fp16, x = var_3753_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_3772_to_fp16 = const()[name = string("op_3772_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614515712)))]; tensor var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616612928)))]; tensor linear_138_cast_fp16 = linear(bias = var_3773_to_fp16, weight = var_3772_to_fp16, x = var_3753_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor var_3775_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3775_shape_cast_fp16")]; int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)]; int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)]; bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)]; string var_3775_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3775_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)]; tensor var_3775_shape_cast_fp16_to_uint16 = cast(dtype = var_3775_shape_cast_fp16_to_uint16_dtype_0, x = var_3775_shape_cast_fp16)[name = string("cast_260")]; uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3775_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")]; string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_259")]; int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")]; tensor expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor([0])]; tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([0])]; tensor expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor([0])]; tensor expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")]; tensor concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor([17])]; int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")]; tensor concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor([0])]; tensor concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor([0])]; tensor concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor([0])]; int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")]; tensor k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")]; tensor v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")]; int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)]; int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1024)]; int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")]; tensor var_3791_begin_0 = const()[name = string("op_3791_begin_0"), val = tensor([0, 0, 0])]; tensor var_3791_end_mask_0 = const()[name = string("op_3791_end_mask_0"), val = tensor([true, false, true])]; tensor var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = concat_384, end_mask = var_3791_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3791_cast_fp16")]; tensor var_3794_begin_0 = const()[name = string("op_3794_begin_0"), val = tensor([0, 0, 0])]; tensor var_3794_end_mask_0 = const()[name = string("op_3794_end_mask_0"), val = tensor([true, false, true])]; tensor var_3794_cast_fp16 = slice_by_index(begin = var_3794_begin_0, end = concat_384, end_mask = var_3794_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3794_cast_fp16")]; tensor concat_386x = const()[name = string("concat_386x"), val = tensor([1, -1, 16, 64])]; tensor var_3804_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3804_cast_fp16")]; tensor const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_139_cast_fp16 = mul(x = var_3804_cast_fp16, y = const_188_to_fp16)[name = string("q_139_cast_fp16")]; tensor concat_387x = const()[name = string("concat_387x"), val = tensor([1, -1, 16, 64])]; tensor var_3811_cast_fp16 = reshape(shape = concat_387x, x = var_3791_cast_fp16)[name = string("op_3811_cast_fp16")]; tensor const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_175_cast_fp16 = mul(x = var_3811_cast_fp16, y = const_189_to_fp16)[name = string("k_175_cast_fp16")]; tensor concat_388x = const()[name = string("concat_388x"), val = tensor([1, -1, 16, 64])]; tensor var_3818_cast_fp16 = reshape(shape = concat_388x, x = var_3794_cast_fp16)[name = string("op_3818_cast_fp16")]; tensor var_3819 = const()[name = string("op_3819"), val = tensor([0, 2, 1, 3])]; bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)]; bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)]; tensor transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_262 = transpose(perm = transpose_262_perm_0, x = k_175_cast_fp16)[name = string("transpose_342")]; tensor transpose_261 = transpose(perm = transpose_261_perm_0, x = q_139_cast_fp16)[name = string("transpose_343")]; tensor qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_103_cast_fp16")]; int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)]; int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)]; bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)]; tensor concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")]; tensor var_3822_begin_0 = const()[name = string("op_3822_begin_0"), val = tensor([0, 0])]; tensor var_3822_end_mask_0 = const()[name = string("op_3822_end_mask_0"), val = tensor([false, true])]; tensor var_3822_cast_fp16 = slice_by_index(begin = var_3822_begin_0, end = concat_389, end_mask = var_3822_end_mask_0, x = mask_to_fp16)[name = string("op_3822_cast_fp16")]; int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)]; int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)]; bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)]; tensor concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")]; tensor var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor([0, 0])]; tensor var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor([true, false])]; tensor var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_390, end_mask = var_3823_end_mask_0, x = var_3822_cast_fp16)[name = string("op_3823_cast_fp16")]; tensor qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3823_cast_fp16)[name = string("qk_105_cast_fp16")]; tensor var_3826_cast_fp16 = softmax(axis = var_3735, x = qk_105_cast_fp16)[name = string("op_3826_cast_fp16")]; bool var_3828_transpose_x_0 = const()[name = string("op_3828_transpose_x_0"), val = bool(false)]; bool var_3828_transpose_y_0 = const()[name = string("op_3828_transpose_y_0"), val = bool(false)]; tensor v_175_cast_fp16 = transpose(perm = var_3819, x = var_3818_cast_fp16)[name = string("transpose_344")]; tensor var_3828_cast_fp16 = matmul(transpose_x = var_3828_transpose_x_0, transpose_y = var_3828_transpose_y_0, x = var_3826_cast_fp16, y = v_175_cast_fp16)[name = string("op_3828_cast_fp16")]; tensor var_3829 = const()[name = string("op_3829"), val = tensor([0, 2, 1, 3])]; tensor concat_391x = const()[name = string("concat_391x"), val = tensor([1, -1, 1024])]; tensor var_3830_cast_fp16 = transpose(perm = var_3829, x = var_3828_cast_fp16)[name = string("transpose_341")]; tensor x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3830_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_3834_to_fp16 = const()[name = string("op_3834_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616615040)))]; tensor var_3835_to_fp16 = const()[name = string("op_3835_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618712256)))]; tensor linear_139_cast_fp16 = linear(bias = var_3835_to_fp16, weight = var_3834_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")]; tensor var_3842_axes_0 = const()[name = string("op_3842_axes_0"), val = tensor([-1])]; tensor blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618714368)))]; tensor blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618716480)))]; tensor var_3842_cast_fp16 = layer_norm(axes = var_3842_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3842_cast_fp16")]; tensor var_3851_to_fp16 = const()[name = string("op_3851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618718592)))]; tensor var_3852_to_fp16 = const()[name = string("op_3852_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620815808)))]; tensor linear_140_cast_fp16 = linear(bias = var_3852_to_fp16, weight = var_3851_to_fp16, x = var_3842_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor concat_392 = const()[name = string("concat_392"), val = tensor([0, 0, 0])]; tensor concat_393 = const()[name = string("concat_393"), val = tensor([0, 1500, 0])]; tensor k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")]; tensor concat_394 = const()[name = string("concat_394"), val = tensor([0, 0, 0])]; tensor concat_395 = const()[name = string("concat_395"), val = tensor([0, 1500, 0])]; tensor v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")]; tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, -1, 16, 64])]; tensor var_3872_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3872_cast_fp16")]; tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_143_cast_fp16 = mul(x = var_3872_cast_fp16, y = const_190_to_fp16)[name = string("q_143_cast_fp16")]; tensor var_3878 = const()[name = string("op_3878"), val = tensor([1, 1500, 16, -1])]; tensor var_3879_cast_fp16 = reshape(shape = var_3878, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3879_cast_fp16")]; tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_179_cast_fp16 = mul(x = var_3879_cast_fp16, y = const_191_to_fp16)[name = string("k_179_cast_fp16")]; tensor var_3885 = const()[name = string("op_3885"), val = tensor([1, 1500, 16, -1])]; tensor var_3886_cast_fp16 = reshape(shape = var_3885, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3886_cast_fp16")]; tensor var_3887 = const()[name = string("op_3887"), val = tensor([0, 2, 1, 3])]; bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)]; bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)]; tensor transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_264 = transpose(perm = transpose_264_perm_0, x = k_179_cast_fp16)[name = string("transpose_338")]; tensor transpose_263 = transpose(perm = transpose_263_perm_0, x = q_143_cast_fp16)[name = string("transpose_339")]; tensor qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_107_cast_fp16")]; tensor var_3891_cast_fp16 = softmax(axis = var_3735, x = qk_107_cast_fp16)[name = string("op_3891_cast_fp16")]; bool var_3893_transpose_x_0 = const()[name = string("op_3893_transpose_x_0"), val = bool(false)]; bool var_3893_transpose_y_0 = const()[name = string("op_3893_transpose_y_0"), val = bool(false)]; tensor v_179_cast_fp16 = transpose(perm = var_3887, x = var_3886_cast_fp16)[name = string("transpose_340")]; tensor var_3893_cast_fp16 = matmul(transpose_x = var_3893_transpose_x_0, transpose_y = var_3893_transpose_y_0, x = var_3891_cast_fp16, y = v_179_cast_fp16)[name = string("op_3893_cast_fp16")]; tensor var_3894 = const()[name = string("op_3894"), val = tensor([0, 2, 1, 3])]; tensor concat_397x = const()[name = string("concat_397x"), val = tensor([1, -1, 1024])]; tensor var_3895_cast_fp16 = transpose(perm = var_3894, x = var_3893_cast_fp16)[name = string("transpose_337")]; tensor x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3895_cast_fp16)[name = string("x_319_cast_fp16")]; tensor var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620817920)))]; tensor var_3900_to_fp16 = const()[name = string("op_3900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622915136)))]; tensor linear_141_cast_fp16 = linear(bias = var_3900_to_fp16, weight = var_3899_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")]; tensor var_3907_axes_0 = const()[name = string("op_3907_axes_0"), val = tensor([-1])]; tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622917248)))]; tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622919360)))]; tensor var_3907_cast_fp16 = layer_norm(axes = var_3907_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3907_cast_fp16")]; tensor var_3916_to_fp16 = const()[name = string("op_3916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622921472)))]; tensor var_3917_to_fp16 = const()[name = string("op_3917_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631310144)))]; tensor linear_142_cast_fp16 = linear(bias = var_3917_to_fp16, weight = var_3916_to_fp16, x = var_3907_cast_fp16)[name = string("linear_142_cast_fp16")]; string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")]; tensor x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")]; tensor var_3922_to_fp16 = const()[name = string("op_3922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631318400)))]; tensor var_3923_to_fp16 = const()[name = string("op_3923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639707072)))]; tensor linear_143_cast_fp16 = linear(bias = var_3923_to_fp16, weight = var_3922_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")]; tensor k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; tensor k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor([19, 1, 448, 1024])]; tensor k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_73_cast_fp16")]; tensor v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; tensor v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor([19, 1, 448, 1024])]; tensor v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_73_cast_fp16")]; tensor k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; tensor k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor([19, 1, 1500, 1024])]; tensor k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")]; tensor v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; tensor v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor([19, 1, 1500, 1024])]; tensor v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")]; int32 var_3946 = const()[name = string("op_3946"), val = int32(-1)]; tensor var_3964_axes_0 = const()[name = string("op_3964_axes_0"), val = tensor([-1])]; tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639709184)))]; tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639711296)))]; fp16 var_3952_to_fp16 = const()[name = string("op_3952_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3964_cast_fp16 = layer_norm(axes = var_3964_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3964_cast_fp16")]; tensor var_3975_to_fp16 = const()[name = string("op_3975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639713408)))]; tensor var_3976_to_fp16 = const()[name = string("op_3976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641810624)))]; tensor linear_144_cast_fp16 = linear(bias = var_3976_to_fp16, weight = var_3975_to_fp16, x = var_3964_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_3979_to_fp16 = const()[name = string("op_3979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641812736)))]; tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3979_to_fp16, x = var_3964_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor var_3983_to_fp16 = const()[name = string("op_3983_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643909952)))]; tensor var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646007168)))]; tensor linear_146_cast_fp16 = linear(bias = var_3984_to_fp16, weight = var_3983_to_fp16, x = var_3964_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor var_3986_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_3986_shape_cast_fp16")]; int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)]; int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)]; bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)]; string var_3986_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3986_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)]; tensor var_3986_shape_cast_fp16_to_uint16 = cast(dtype = var_3986_shape_cast_fp16_to_uint16_dtype_0, x = var_3986_shape_cast_fp16)[name = string("cast_258")]; uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_3986_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")]; string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_257")]; int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")]; tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; tensor expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor([0])]; tensor expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor([0])]; tensor expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")]; tensor concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor([18])]; int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)]; bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)]; tensor concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")]; tensor concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor([0])]; tensor concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor([0])]; tensor concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor([0])]; int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)]; bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)]; tensor concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")]; tensor k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")]; tensor v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")]; int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)]; int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1024)]; int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)]; bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)]; tensor concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")]; tensor var_4002_begin_0 = const()[name = string("op_4002_begin_0"), val = tensor([0, 0, 0])]; tensor var_4002_end_mask_0 = const()[name = string("op_4002_end_mask_0"), val = tensor([true, false, true])]; tensor var_4002_cast_fp16 = slice_by_index(begin = var_4002_begin_0, end = concat_406, end_mask = var_4002_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4002_cast_fp16")]; tensor var_4005_begin_0 = const()[name = string("op_4005_begin_0"), val = tensor([0, 0, 0])]; tensor var_4005_end_mask_0 = const()[name = string("op_4005_end_mask_0"), val = tensor([true, false, true])]; tensor var_4005_cast_fp16 = slice_by_index(begin = var_4005_begin_0, end = concat_406, end_mask = var_4005_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4005_cast_fp16")]; tensor concat_408x = const()[name = string("concat_408x"), val = tensor([1, -1, 16, 64])]; tensor var_4015_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4015_cast_fp16")]; tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_147_cast_fp16 = mul(x = var_4015_cast_fp16, y = const_192_to_fp16)[name = string("q_147_cast_fp16")]; tensor concat_409x = const()[name = string("concat_409x"), val = tensor([1, -1, 16, 64])]; tensor var_4022_cast_fp16 = reshape(shape = concat_409x, x = var_4002_cast_fp16)[name = string("op_4022_cast_fp16")]; tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_185_cast_fp16 = mul(x = var_4022_cast_fp16, y = const_193_to_fp16)[name = string("k_185_cast_fp16")]; tensor concat_410x = const()[name = string("concat_410x"), val = tensor([1, -1, 16, 64])]; tensor var_4029_cast_fp16 = reshape(shape = concat_410x, x = var_4005_cast_fp16)[name = string("op_4029_cast_fp16")]; tensor var_4030 = const()[name = string("op_4030"), val = tensor([0, 2, 1, 3])]; bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)]; bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)]; tensor transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_266 = transpose(perm = transpose_266_perm_0, x = k_185_cast_fp16)[name = string("transpose_334")]; tensor transpose_265 = transpose(perm = transpose_265_perm_0, x = q_147_cast_fp16)[name = string("transpose_335")]; tensor qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_109_cast_fp16")]; int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)]; int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)]; bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)]; tensor concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")]; tensor var_4033_begin_0 = const()[name = string("op_4033_begin_0"), val = tensor([0, 0])]; tensor var_4033_end_mask_0 = const()[name = string("op_4033_end_mask_0"), val = tensor([false, true])]; tensor var_4033_cast_fp16 = slice_by_index(begin = var_4033_begin_0, end = concat_411, end_mask = var_4033_end_mask_0, x = mask_to_fp16)[name = string("op_4033_cast_fp16")]; int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)]; int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")]; tensor var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor([0, 0])]; tensor var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor([true, false])]; tensor var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_412, end_mask = var_4034_end_mask_0, x = var_4033_cast_fp16)[name = string("op_4034_cast_fp16")]; tensor qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4034_cast_fp16)[name = string("qk_111_cast_fp16")]; tensor var_4037_cast_fp16 = softmax(axis = var_3946, x = qk_111_cast_fp16)[name = string("op_4037_cast_fp16")]; bool var_4039_transpose_x_0 = const()[name = string("op_4039_transpose_x_0"), val = bool(false)]; bool var_4039_transpose_y_0 = const()[name = string("op_4039_transpose_y_0"), val = bool(false)]; tensor v_185_cast_fp16 = transpose(perm = var_4030, x = var_4029_cast_fp16)[name = string("transpose_336")]; tensor var_4039_cast_fp16 = matmul(transpose_x = var_4039_transpose_x_0, transpose_y = var_4039_transpose_y_0, x = var_4037_cast_fp16, y = v_185_cast_fp16)[name = string("op_4039_cast_fp16")]; tensor var_4040 = const()[name = string("op_4040"), val = tensor([0, 2, 1, 3])]; tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, -1, 1024])]; tensor var_4041_cast_fp16 = transpose(perm = var_4040, x = var_4039_cast_fp16)[name = string("transpose_333")]; tensor x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4041_cast_fp16)[name = string("x_331_cast_fp16")]; tensor var_4045_to_fp16 = const()[name = string("op_4045_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646009280)))]; tensor var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648106496)))]; tensor linear_147_cast_fp16 = linear(bias = var_4046_to_fp16, weight = var_4045_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_4053_axes_0 = const()[name = string("op_4053_axes_0"), val = tensor([-1])]; tensor blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648108608)))]; tensor blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648110720)))]; tensor var_4053_cast_fp16 = layer_norm(axes = var_4053_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4053_cast_fp16")]; tensor var_4062_to_fp16 = const()[name = string("op_4062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648112832)))]; tensor var_4063_to_fp16 = const()[name = string("op_4063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650210048)))]; tensor linear_148_cast_fp16 = linear(bias = var_4063_to_fp16, weight = var_4062_to_fp16, x = var_4053_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor concat_414 = const()[name = string("concat_414"), val = tensor([0, 0, 0])]; tensor concat_415 = const()[name = string("concat_415"), val = tensor([0, 1500, 0])]; tensor k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")]; tensor concat_416 = const()[name = string("concat_416"), val = tensor([0, 0, 0])]; tensor concat_417 = const()[name = string("concat_417"), val = tensor([0, 1500, 0])]; tensor v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")]; tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 16, 64])]; tensor var_4083_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4083_cast_fp16")]; tensor const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_151_cast_fp16 = mul(x = var_4083_cast_fp16, y = const_194_to_fp16)[name = string("q_151_cast_fp16")]; tensor var_4089 = const()[name = string("op_4089"), val = tensor([1, 1500, 16, -1])]; tensor var_4090_cast_fp16 = reshape(shape = var_4089, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4090_cast_fp16")]; tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_189_cast_fp16 = mul(x = var_4090_cast_fp16, y = const_195_to_fp16)[name = string("k_189_cast_fp16")]; tensor var_4096 = const()[name = string("op_4096"), val = tensor([1, 1500, 16, -1])]; tensor var_4097_cast_fp16 = reshape(shape = var_4096, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4097_cast_fp16")]; tensor var_4098 = const()[name = string("op_4098"), val = tensor([0, 2, 1, 3])]; bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)]; bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)]; tensor transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_268 = transpose(perm = transpose_268_perm_0, x = k_189_cast_fp16)[name = string("transpose_330")]; tensor transpose_267 = transpose(perm = transpose_267_perm_0, x = q_151_cast_fp16)[name = string("transpose_331")]; tensor qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_113_cast_fp16")]; tensor var_4102_cast_fp16 = softmax(axis = var_3946, x = qk_113_cast_fp16)[name = string("op_4102_cast_fp16")]; bool var_4104_transpose_x_0 = const()[name = string("op_4104_transpose_x_0"), val = bool(false)]; bool var_4104_transpose_y_0 = const()[name = string("op_4104_transpose_y_0"), val = bool(false)]; tensor v_189_cast_fp16 = transpose(perm = var_4098, x = var_4097_cast_fp16)[name = string("transpose_332")]; tensor var_4104_cast_fp16 = matmul(transpose_x = var_4104_transpose_x_0, transpose_y = var_4104_transpose_y_0, x = var_4102_cast_fp16, y = v_189_cast_fp16)[name = string("op_4104_cast_fp16")]; tensor var_4105 = const()[name = string("op_4105"), val = tensor([0, 2, 1, 3])]; tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 1024])]; tensor var_4106_cast_fp16 = transpose(perm = var_4105, x = var_4104_cast_fp16)[name = string("transpose_329")]; tensor x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4106_cast_fp16)[name = string("x_337_cast_fp16")]; tensor var_4110_to_fp16 = const()[name = string("op_4110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650212160)))]; tensor var_4111_to_fp16 = const()[name = string("op_4111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652309376)))]; tensor linear_149_cast_fp16 = linear(bias = var_4111_to_fp16, weight = var_4110_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")]; tensor var_4118_axes_0 = const()[name = string("op_4118_axes_0"), val = tensor([-1])]; tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652311488)))]; tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652313600)))]; tensor var_4118_cast_fp16 = layer_norm(axes = var_4118_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4118_cast_fp16")]; tensor var_4127_to_fp16 = const()[name = string("op_4127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652315712)))]; tensor var_4128_to_fp16 = const()[name = string("op_4128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660704384)))]; tensor linear_150_cast_fp16 = linear(bias = var_4128_to_fp16, weight = var_4127_to_fp16, x = var_4118_cast_fp16)[name = string("linear_150_cast_fp16")]; string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")]; tensor x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")]; tensor var_4133_to_fp16 = const()[name = string("op_4133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660712640)))]; tensor var_4134_to_fp16 = const()[name = string("op_4134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669101312)))]; tensor linear_151_cast_fp16 = linear(bias = var_4134_to_fp16, weight = var_4133_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")]; tensor k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; tensor k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor([20, 1, 448, 1024])]; tensor k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_77_cast_fp16")]; tensor v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; tensor v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor([20, 1, 448, 1024])]; tensor v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_77_cast_fp16")]; tensor k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; tensor k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor([20, 1, 1500, 1024])]; tensor k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")]; tensor v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; tensor v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor([20, 1, 1500, 1024])]; tensor v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")]; int32 var_4157 = const()[name = string("op_4157"), val = int32(-1)]; tensor var_4175_axes_0 = const()[name = string("op_4175_axes_0"), val = tensor([-1])]; tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669103424)))]; tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669105536)))]; fp16 var_4163_to_fp16 = const()[name = string("op_4163_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4175_cast_fp16 = layer_norm(axes = var_4175_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4175_cast_fp16")]; tensor var_4186_to_fp16 = const()[name = string("op_4186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669107648)))]; tensor var_4187_to_fp16 = const()[name = string("op_4187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671204864)))]; tensor linear_152_cast_fp16 = linear(bias = var_4187_to_fp16, weight = var_4186_to_fp16, x = var_4175_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor var_4190_to_fp16 = const()[name = string("op_4190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671206976)))]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4190_to_fp16, x = var_4175_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor var_4194_to_fp16 = const()[name = string("op_4194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673304192)))]; tensor var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675401408)))]; tensor linear_154_cast_fp16 = linear(bias = var_4195_to_fp16, weight = var_4194_to_fp16, x = var_4175_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor var_4197_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4197_shape_cast_fp16")]; int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)]; int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)]; bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)]; string var_4197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)]; tensor var_4197_shape_cast_fp16_to_uint16 = cast(dtype = var_4197_shape_cast_fp16_to_uint16_dtype_0, x = var_4197_shape_cast_fp16)[name = string("cast_256")]; uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4197_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")]; string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_255")]; int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")]; tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([0])]; tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([0])]; tensor expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor([0])]; tensor expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")]; tensor concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor([19])]; int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)]; bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)]; tensor concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")]; tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([0])]; tensor concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor([0])]; tensor concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor([0])]; int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")]; tensor k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")]; tensor v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")]; int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)]; int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1024)]; int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)]; bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)]; tensor concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")]; tensor var_4213_begin_0 = const()[name = string("op_4213_begin_0"), val = tensor([0, 0, 0])]; tensor var_4213_end_mask_0 = const()[name = string("op_4213_end_mask_0"), val = tensor([true, false, true])]; tensor var_4213_cast_fp16 = slice_by_index(begin = var_4213_begin_0, end = concat_428, end_mask = var_4213_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4213_cast_fp16")]; tensor var_4216_begin_0 = const()[name = string("op_4216_begin_0"), val = tensor([0, 0, 0])]; tensor var_4216_end_mask_0 = const()[name = string("op_4216_end_mask_0"), val = tensor([true, false, true])]; tensor var_4216_cast_fp16 = slice_by_index(begin = var_4216_begin_0, end = concat_428, end_mask = var_4216_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor concat_430x = const()[name = string("concat_430x"), val = tensor([1, -1, 16, 64])]; tensor var_4226_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4226_cast_fp16")]; tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_155_cast_fp16 = mul(x = var_4226_cast_fp16, y = const_196_to_fp16)[name = string("q_155_cast_fp16")]; tensor concat_431x = const()[name = string("concat_431x"), val = tensor([1, -1, 16, 64])]; tensor var_4233_cast_fp16 = reshape(shape = concat_431x, x = var_4213_cast_fp16)[name = string("op_4233_cast_fp16")]; tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_195_cast_fp16 = mul(x = var_4233_cast_fp16, y = const_197_to_fp16)[name = string("k_195_cast_fp16")]; tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, -1, 16, 64])]; tensor var_4240_cast_fp16 = reshape(shape = concat_432x, x = var_4216_cast_fp16)[name = string("op_4240_cast_fp16")]; tensor var_4241 = const()[name = string("op_4241"), val = tensor([0, 2, 1, 3])]; bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)]; bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)]; tensor transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_270 = transpose(perm = transpose_270_perm_0, x = k_195_cast_fp16)[name = string("transpose_326")]; tensor transpose_269 = transpose(perm = transpose_269_perm_0, x = q_155_cast_fp16)[name = string("transpose_327")]; tensor qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_115_cast_fp16")]; int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)]; int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")]; tensor var_4244_begin_0 = const()[name = string("op_4244_begin_0"), val = tensor([0, 0])]; tensor var_4244_end_mask_0 = const()[name = string("op_4244_end_mask_0"), val = tensor([false, true])]; tensor var_4244_cast_fp16 = slice_by_index(begin = var_4244_begin_0, end = concat_433, end_mask = var_4244_end_mask_0, x = mask_to_fp16)[name = string("op_4244_cast_fp16")]; int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)]; int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")]; tensor var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor([0, 0])]; tensor var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor([true, false])]; tensor var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_434, end_mask = var_4245_end_mask_0, x = var_4244_cast_fp16)[name = string("op_4245_cast_fp16")]; tensor qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4245_cast_fp16)[name = string("qk_117_cast_fp16")]; tensor var_4248_cast_fp16 = softmax(axis = var_4157, x = qk_117_cast_fp16)[name = string("op_4248_cast_fp16")]; bool var_4250_transpose_x_0 = const()[name = string("op_4250_transpose_x_0"), val = bool(false)]; bool var_4250_transpose_y_0 = const()[name = string("op_4250_transpose_y_0"), val = bool(false)]; tensor v_195_cast_fp16 = transpose(perm = var_4241, x = var_4240_cast_fp16)[name = string("transpose_328")]; tensor var_4250_cast_fp16 = matmul(transpose_x = var_4250_transpose_x_0, transpose_y = var_4250_transpose_y_0, x = var_4248_cast_fp16, y = v_195_cast_fp16)[name = string("op_4250_cast_fp16")]; tensor var_4251 = const()[name = string("op_4251"), val = tensor([0, 2, 1, 3])]; tensor concat_435x = const()[name = string("concat_435x"), val = tensor([1, -1, 1024])]; tensor var_4252_cast_fp16 = transpose(perm = var_4251, x = var_4250_cast_fp16)[name = string("transpose_325")]; tensor x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4252_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_4256_to_fp16 = const()[name = string("op_4256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675403520)))]; tensor var_4257_to_fp16 = const()[name = string("op_4257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677500736)))]; tensor linear_155_cast_fp16 = linear(bias = var_4257_to_fp16, weight = var_4256_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")]; tensor var_4264_axes_0 = const()[name = string("op_4264_axes_0"), val = tensor([-1])]; tensor blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677502848)))]; tensor blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677504960)))]; tensor var_4264_cast_fp16 = layer_norm(axes = var_4264_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4264_cast_fp16")]; tensor var_4273_to_fp16 = const()[name = string("op_4273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677507072)))]; tensor var_4274_to_fp16 = const()[name = string("op_4274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679604288)))]; tensor linear_156_cast_fp16 = linear(bias = var_4274_to_fp16, weight = var_4273_to_fp16, x = var_4264_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor concat_436 = const()[name = string("concat_436"), val = tensor([0, 0, 0])]; tensor concat_437 = const()[name = string("concat_437"), val = tensor([0, 1500, 0])]; tensor k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")]; tensor concat_438 = const()[name = string("concat_438"), val = tensor([0, 0, 0])]; tensor concat_439 = const()[name = string("concat_439"), val = tensor([0, 1500, 0])]; tensor v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")]; tensor concat_440x = const()[name = string("concat_440x"), val = tensor([1, -1, 16, 64])]; tensor var_4294_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4294_cast_fp16")]; tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_159_cast_fp16 = mul(x = var_4294_cast_fp16, y = const_198_to_fp16)[name = string("q_159_cast_fp16")]; tensor var_4300 = const()[name = string("op_4300"), val = tensor([1, 1500, 16, -1])]; tensor var_4301_cast_fp16 = reshape(shape = var_4300, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4301_cast_fp16")]; tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_199_cast_fp16 = mul(x = var_4301_cast_fp16, y = const_199_to_fp16)[name = string("k_199_cast_fp16")]; tensor var_4307 = const()[name = string("op_4307"), val = tensor([1, 1500, 16, -1])]; tensor var_4308_cast_fp16 = reshape(shape = var_4307, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4308_cast_fp16")]; tensor var_4309 = const()[name = string("op_4309"), val = tensor([0, 2, 1, 3])]; bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)]; bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)]; tensor transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_272 = transpose(perm = transpose_272_perm_0, x = k_199_cast_fp16)[name = string("transpose_322")]; tensor transpose_271 = transpose(perm = transpose_271_perm_0, x = q_159_cast_fp16)[name = string("transpose_323")]; tensor qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_119_cast_fp16")]; tensor var_4313_cast_fp16 = softmax(axis = var_4157, x = qk_119_cast_fp16)[name = string("op_4313_cast_fp16")]; bool var_4315_transpose_x_0 = const()[name = string("op_4315_transpose_x_0"), val = bool(false)]; bool var_4315_transpose_y_0 = const()[name = string("op_4315_transpose_y_0"), val = bool(false)]; tensor v_199_cast_fp16 = transpose(perm = var_4309, x = var_4308_cast_fp16)[name = string("transpose_324")]; tensor var_4315_cast_fp16 = matmul(transpose_x = var_4315_transpose_x_0, transpose_y = var_4315_transpose_y_0, x = var_4313_cast_fp16, y = v_199_cast_fp16)[name = string("op_4315_cast_fp16")]; tensor var_4316 = const()[name = string("op_4316"), val = tensor([0, 2, 1, 3])]; tensor concat_441x = const()[name = string("concat_441x"), val = tensor([1, -1, 1024])]; tensor var_4317_cast_fp16 = transpose(perm = var_4316, x = var_4315_cast_fp16)[name = string("transpose_321")]; tensor x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4317_cast_fp16)[name = string("x_355_cast_fp16")]; tensor var_4321_to_fp16 = const()[name = string("op_4321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679606400)))]; tensor var_4322_to_fp16 = const()[name = string("op_4322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681703616)))]; tensor linear_157_cast_fp16 = linear(bias = var_4322_to_fp16, weight = var_4321_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")]; tensor var_4329_axes_0 = const()[name = string("op_4329_axes_0"), val = tensor([-1])]; tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681705728)))]; tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681707840)))]; tensor var_4329_cast_fp16 = layer_norm(axes = var_4329_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4329_cast_fp16")]; tensor var_4338_to_fp16 = const()[name = string("op_4338_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681709952)))]; tensor var_4339_to_fp16 = const()[name = string("op_4339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(690098624)))]; tensor linear_158_cast_fp16 = linear(bias = var_4339_to_fp16, weight = var_4338_to_fp16, x = var_4329_cast_fp16)[name = string("linear_158_cast_fp16")]; string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")]; tensor x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")]; tensor var_4344_to_fp16 = const()[name = string("op_4344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(690106880)))]; tensor var_4345_to_fp16 = const()[name = string("op_4345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698495552)))]; tensor linear_159_cast_fp16 = linear(bias = var_4345_to_fp16, weight = var_4344_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")]; tensor k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; tensor k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor([21, 1, 448, 1024])]; tensor k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_81_cast_fp16")]; tensor v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; tensor v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor([21, 1, 448, 1024])]; tensor v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_81_cast_fp16")]; tensor k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; tensor k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor([21, 1, 1500, 1024])]; tensor k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")]; tensor v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; tensor v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor([21, 1, 1500, 1024])]; tensor v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")]; int32 var_4368 = const()[name = string("op_4368"), val = int32(-1)]; tensor var_4386_axes_0 = const()[name = string("op_4386_axes_0"), val = tensor([-1])]; tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698497664)))]; tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698499776)))]; fp16 var_4374_to_fp16 = const()[name = string("op_4374_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4386_cast_fp16 = layer_norm(axes = var_4386_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4386_cast_fp16")]; tensor var_4397_to_fp16 = const()[name = string("op_4397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698501888)))]; tensor var_4398_to_fp16 = const()[name = string("op_4398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700599104)))]; tensor linear_160_cast_fp16 = linear(bias = var_4398_to_fp16, weight = var_4397_to_fp16, x = var_4386_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor var_4401_to_fp16 = const()[name = string("op_4401_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700601216)))]; tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4401_to_fp16, x = var_4386_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor var_4405_to_fp16 = const()[name = string("op_4405_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702698432)))]; tensor var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704795648)))]; tensor linear_162_cast_fp16 = linear(bias = var_4406_to_fp16, weight = var_4405_to_fp16, x = var_4386_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor var_4408_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4408_shape_cast_fp16")]; int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)]; int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)]; bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)]; string var_4408_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4408_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)]; tensor var_4408_shape_cast_fp16_to_uint16 = cast(dtype = var_4408_shape_cast_fp16_to_uint16_dtype_0, x = var_4408_shape_cast_fp16)[name = string("cast_254")]; uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4408_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")]; string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_253")]; int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")]; tensor expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor([0])]; tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([0])]; tensor expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor([0])]; tensor expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")]; tensor concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor([20])]; int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)]; bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)]; tensor concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")]; tensor concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor([0])]; tensor concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor([0])]; tensor concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor([0])]; int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)]; bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)]; tensor concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")]; tensor k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")]; tensor v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")]; int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)]; int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1024)]; int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")]; tensor var_4424_begin_0 = const()[name = string("op_4424_begin_0"), val = tensor([0, 0, 0])]; tensor var_4424_end_mask_0 = const()[name = string("op_4424_end_mask_0"), val = tensor([true, false, true])]; tensor var_4424_cast_fp16 = slice_by_index(begin = var_4424_begin_0, end = concat_450, end_mask = var_4424_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4424_cast_fp16")]; tensor var_4427_begin_0 = const()[name = string("op_4427_begin_0"), val = tensor([0, 0, 0])]; tensor var_4427_end_mask_0 = const()[name = string("op_4427_end_mask_0"), val = tensor([true, false, true])]; tensor var_4427_cast_fp16 = slice_by_index(begin = var_4427_begin_0, end = concat_450, end_mask = var_4427_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4427_cast_fp16")]; tensor concat_452x = const()[name = string("concat_452x"), val = tensor([1, -1, 16, 64])]; tensor var_4437_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4437_cast_fp16")]; tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_163_cast_fp16 = mul(x = var_4437_cast_fp16, y = const_200_to_fp16)[name = string("q_163_cast_fp16")]; tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, -1, 16, 64])]; tensor var_4444_cast_fp16 = reshape(shape = concat_453x, x = var_4424_cast_fp16)[name = string("op_4444_cast_fp16")]; tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_205_cast_fp16 = mul(x = var_4444_cast_fp16, y = const_201_to_fp16)[name = string("k_205_cast_fp16")]; tensor concat_454x = const()[name = string("concat_454x"), val = tensor([1, -1, 16, 64])]; tensor var_4451_cast_fp16 = reshape(shape = concat_454x, x = var_4427_cast_fp16)[name = string("op_4451_cast_fp16")]; tensor var_4452 = const()[name = string("op_4452"), val = tensor([0, 2, 1, 3])]; bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)]; bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)]; tensor transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_274 = transpose(perm = transpose_274_perm_0, x = k_205_cast_fp16)[name = string("transpose_318")]; tensor transpose_273 = transpose(perm = transpose_273_perm_0, x = q_163_cast_fp16)[name = string("transpose_319")]; tensor qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_121_cast_fp16")]; int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)]; int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")]; tensor var_4455_begin_0 = const()[name = string("op_4455_begin_0"), val = tensor([0, 0])]; tensor var_4455_end_mask_0 = const()[name = string("op_4455_end_mask_0"), val = tensor([false, true])]; tensor var_4455_cast_fp16 = slice_by_index(begin = var_4455_begin_0, end = concat_455, end_mask = var_4455_end_mask_0, x = mask_to_fp16)[name = string("op_4455_cast_fp16")]; int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)]; int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")]; tensor var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor([0, 0])]; tensor var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor([true, false])]; tensor var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_456, end_mask = var_4456_end_mask_0, x = var_4455_cast_fp16)[name = string("op_4456_cast_fp16")]; tensor qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4456_cast_fp16)[name = string("qk_123_cast_fp16")]; tensor var_4459_cast_fp16 = softmax(axis = var_4368, x = qk_123_cast_fp16)[name = string("op_4459_cast_fp16")]; bool var_4461_transpose_x_0 = const()[name = string("op_4461_transpose_x_0"), val = bool(false)]; bool var_4461_transpose_y_0 = const()[name = string("op_4461_transpose_y_0"), val = bool(false)]; tensor v_205_cast_fp16 = transpose(perm = var_4452, x = var_4451_cast_fp16)[name = string("transpose_320")]; tensor var_4461_cast_fp16 = matmul(transpose_x = var_4461_transpose_x_0, transpose_y = var_4461_transpose_y_0, x = var_4459_cast_fp16, y = v_205_cast_fp16)[name = string("op_4461_cast_fp16")]; tensor var_4462 = const()[name = string("op_4462"), val = tensor([0, 2, 1, 3])]; tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 1024])]; tensor var_4463_cast_fp16 = transpose(perm = var_4462, x = var_4461_cast_fp16)[name = string("transpose_317")]; tensor x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4463_cast_fp16)[name = string("x_367_cast_fp16")]; tensor var_4467_to_fp16 = const()[name = string("op_4467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704797760)))]; tensor var_4468_to_fp16 = const()[name = string("op_4468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706894976)))]; tensor linear_163_cast_fp16 = linear(bias = var_4468_to_fp16, weight = var_4467_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")]; tensor var_4475_axes_0 = const()[name = string("op_4475_axes_0"), val = tensor([-1])]; tensor blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706897088)))]; tensor blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706899200)))]; tensor var_4475_cast_fp16 = layer_norm(axes = var_4475_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4475_cast_fp16")]; tensor var_4484_to_fp16 = const()[name = string("op_4484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706901312)))]; tensor var_4485_to_fp16 = const()[name = string("op_4485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708998528)))]; tensor linear_164_cast_fp16 = linear(bias = var_4485_to_fp16, weight = var_4484_to_fp16, x = var_4475_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor concat_458 = const()[name = string("concat_458"), val = tensor([0, 0, 0])]; tensor concat_459 = const()[name = string("concat_459"), val = tensor([0, 1500, 0])]; tensor k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")]; tensor concat_460 = const()[name = string("concat_460"), val = tensor([0, 0, 0])]; tensor concat_461 = const()[name = string("concat_461"), val = tensor([0, 1500, 0])]; tensor v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")]; tensor concat_462x = const()[name = string("concat_462x"), val = tensor([1, -1, 16, 64])]; tensor var_4505_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4505_cast_fp16")]; tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_167_cast_fp16 = mul(x = var_4505_cast_fp16, y = const_202_to_fp16)[name = string("q_167_cast_fp16")]; tensor var_4511 = const()[name = string("op_4511"), val = tensor([1, 1500, 16, -1])]; tensor var_4512_cast_fp16 = reshape(shape = var_4511, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4512_cast_fp16")]; tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_209_cast_fp16 = mul(x = var_4512_cast_fp16, y = const_203_to_fp16)[name = string("k_209_cast_fp16")]; tensor var_4518 = const()[name = string("op_4518"), val = tensor([1, 1500, 16, -1])]; tensor var_4519_cast_fp16 = reshape(shape = var_4518, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4519_cast_fp16")]; tensor var_4520 = const()[name = string("op_4520"), val = tensor([0, 2, 1, 3])]; bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)]; bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)]; tensor transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_276 = transpose(perm = transpose_276_perm_0, x = k_209_cast_fp16)[name = string("transpose_314")]; tensor transpose_275 = transpose(perm = transpose_275_perm_0, x = q_167_cast_fp16)[name = string("transpose_315")]; tensor qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_125_cast_fp16")]; tensor var_4524_cast_fp16 = softmax(axis = var_4368, x = qk_125_cast_fp16)[name = string("op_4524_cast_fp16")]; bool var_4526_transpose_x_0 = const()[name = string("op_4526_transpose_x_0"), val = bool(false)]; bool var_4526_transpose_y_0 = const()[name = string("op_4526_transpose_y_0"), val = bool(false)]; tensor v_209_cast_fp16 = transpose(perm = var_4520, x = var_4519_cast_fp16)[name = string("transpose_316")]; tensor var_4526_cast_fp16 = matmul(transpose_x = var_4526_transpose_x_0, transpose_y = var_4526_transpose_y_0, x = var_4524_cast_fp16, y = v_209_cast_fp16)[name = string("op_4526_cast_fp16")]; tensor var_4527 = const()[name = string("op_4527"), val = tensor([0, 2, 1, 3])]; tensor concat_463x = const()[name = string("concat_463x"), val = tensor([1, -1, 1024])]; tensor var_4528_cast_fp16 = transpose(perm = var_4527, x = var_4526_cast_fp16)[name = string("transpose_313")]; tensor x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4528_cast_fp16)[name = string("x_373_cast_fp16")]; tensor var_4532_to_fp16 = const()[name = string("op_4532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709000640)))]; tensor var_4533_to_fp16 = const()[name = string("op_4533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711097856)))]; tensor linear_165_cast_fp16 = linear(bias = var_4533_to_fp16, weight = var_4532_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")]; tensor var_4540_axes_0 = const()[name = string("op_4540_axes_0"), val = tensor([-1])]; tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711099968)))]; tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711102080)))]; tensor var_4540_cast_fp16 = layer_norm(axes = var_4540_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4540_cast_fp16")]; tensor var_4549_to_fp16 = const()[name = string("op_4549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711104192)))]; tensor var_4550_to_fp16 = const()[name = string("op_4550_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719492864)))]; tensor linear_166_cast_fp16 = linear(bias = var_4550_to_fp16, weight = var_4549_to_fp16, x = var_4540_cast_fp16)[name = string("linear_166_cast_fp16")]; string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")]; tensor x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")]; tensor var_4555_to_fp16 = const()[name = string("op_4555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719501120)))]; tensor var_4556_to_fp16 = const()[name = string("op_4556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727889792)))]; tensor linear_167_cast_fp16 = linear(bias = var_4556_to_fp16, weight = var_4555_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")]; tensor k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; tensor k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor([22, 1, 448, 1024])]; tensor k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_85_cast_fp16")]; tensor v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; tensor v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor([22, 1, 448, 1024])]; tensor v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_85_cast_fp16")]; tensor k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; tensor k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor([22, 1, 1500, 1024])]; tensor k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")]; tensor v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; tensor v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor([22, 1, 1500, 1024])]; tensor v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")]; int32 var_4579 = const()[name = string("op_4579"), val = int32(-1)]; tensor var_4597_axes_0 = const()[name = string("op_4597_axes_0"), val = tensor([-1])]; tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727891904)))]; tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727894016)))]; fp16 var_4585_to_fp16 = const()[name = string("op_4585_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4597_cast_fp16 = layer_norm(axes = var_4597_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4597_cast_fp16")]; tensor var_4608_to_fp16 = const()[name = string("op_4608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727896128)))]; tensor var_4609_to_fp16 = const()[name = string("op_4609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729993344)))]; tensor linear_168_cast_fp16 = linear(bias = var_4609_to_fp16, weight = var_4608_to_fp16, x = var_4597_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor var_4612_to_fp16 = const()[name = string("op_4612_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729995456)))]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4612_to_fp16, x = var_4597_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor var_4616_to_fp16 = const()[name = string("op_4616_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732092672)))]; tensor var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734189888)))]; tensor linear_170_cast_fp16 = linear(bias = var_4617_to_fp16, weight = var_4616_to_fp16, x = var_4597_cast_fp16)[name = string("linear_170_cast_fp16")]; tensor var_4619_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4619_shape_cast_fp16")]; int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)]; int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)]; bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)]; string var_4619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)]; tensor var_4619_shape_cast_fp16_to_uint16 = cast(dtype = var_4619_shape_cast_fp16_to_uint16_dtype_0, x = var_4619_shape_cast_fp16)[name = string("cast_252")]; uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4619_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")]; string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_251")]; int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")]; tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; tensor expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor([0])]; tensor expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor([0])]; tensor expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")]; tensor concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor([21])]; int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)]; bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)]; tensor concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")]; tensor concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor([0])]; tensor concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor([0])]; tensor concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor([0])]; int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)]; bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)]; tensor concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")]; tensor k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")]; tensor v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")]; int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)]; int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1024)]; int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)]; bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)]; tensor concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")]; tensor var_4635_begin_0 = const()[name = string("op_4635_begin_0"), val = tensor([0, 0, 0])]; tensor var_4635_end_mask_0 = const()[name = string("op_4635_end_mask_0"), val = tensor([true, false, true])]; tensor var_4635_cast_fp16 = slice_by_index(begin = var_4635_begin_0, end = concat_472, end_mask = var_4635_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4635_cast_fp16")]; tensor var_4638_begin_0 = const()[name = string("op_4638_begin_0"), val = tensor([0, 0, 0])]; tensor var_4638_end_mask_0 = const()[name = string("op_4638_end_mask_0"), val = tensor([true, false, true])]; tensor var_4638_cast_fp16 = slice_by_index(begin = var_4638_begin_0, end = concat_472, end_mask = var_4638_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4638_cast_fp16")]; tensor concat_474x = const()[name = string("concat_474x"), val = tensor([1, -1, 16, 64])]; tensor var_4648_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4648_cast_fp16")]; tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_171_cast_fp16 = mul(x = var_4648_cast_fp16, y = const_204_to_fp16)[name = string("q_171_cast_fp16")]; tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 16, 64])]; tensor var_4655_cast_fp16 = reshape(shape = concat_475x, x = var_4635_cast_fp16)[name = string("op_4655_cast_fp16")]; tensor const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_215_cast_fp16 = mul(x = var_4655_cast_fp16, y = const_205_to_fp16)[name = string("k_215_cast_fp16")]; tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 16, 64])]; tensor var_4662_cast_fp16 = reshape(shape = concat_476x, x = var_4638_cast_fp16)[name = string("op_4662_cast_fp16")]; tensor var_4663 = const()[name = string("op_4663"), val = tensor([0, 2, 1, 3])]; bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)]; bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)]; tensor transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_278 = transpose(perm = transpose_278_perm_0, x = k_215_cast_fp16)[name = string("transpose_310")]; tensor transpose_277 = transpose(perm = transpose_277_perm_0, x = q_171_cast_fp16)[name = string("transpose_311")]; tensor qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_127_cast_fp16")]; int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)]; int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)]; bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)]; tensor concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")]; tensor var_4666_begin_0 = const()[name = string("op_4666_begin_0"), val = tensor([0, 0])]; tensor var_4666_end_mask_0 = const()[name = string("op_4666_end_mask_0"), val = tensor([false, true])]; tensor var_4666_cast_fp16 = slice_by_index(begin = var_4666_begin_0, end = concat_477, end_mask = var_4666_end_mask_0, x = mask_to_fp16)[name = string("op_4666_cast_fp16")]; int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)]; int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)]; bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)]; tensor concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")]; tensor var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor([0, 0])]; tensor var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor([true, false])]; tensor var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_478, end_mask = var_4667_end_mask_0, x = var_4666_cast_fp16)[name = string("op_4667_cast_fp16")]; tensor qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4667_cast_fp16)[name = string("qk_129_cast_fp16")]; tensor var_4670_cast_fp16 = softmax(axis = var_4579, x = qk_129_cast_fp16)[name = string("op_4670_cast_fp16")]; bool var_4672_transpose_x_0 = const()[name = string("op_4672_transpose_x_0"), val = bool(false)]; bool var_4672_transpose_y_0 = const()[name = string("op_4672_transpose_y_0"), val = bool(false)]; tensor v_215_cast_fp16 = transpose(perm = var_4663, x = var_4662_cast_fp16)[name = string("transpose_312")]; tensor var_4672_cast_fp16 = matmul(transpose_x = var_4672_transpose_x_0, transpose_y = var_4672_transpose_y_0, x = var_4670_cast_fp16, y = v_215_cast_fp16)[name = string("op_4672_cast_fp16")]; tensor var_4673 = const()[name = string("op_4673"), val = tensor([0, 2, 1, 3])]; tensor concat_479x = const()[name = string("concat_479x"), val = tensor([1, -1, 1024])]; tensor var_4674_cast_fp16 = transpose(perm = var_4673, x = var_4672_cast_fp16)[name = string("transpose_309")]; tensor x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4674_cast_fp16)[name = string("x_385_cast_fp16")]; tensor var_4678_to_fp16 = const()[name = string("op_4678_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734192000)))]; tensor var_4679_to_fp16 = const()[name = string("op_4679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736289216)))]; tensor linear_171_cast_fp16 = linear(bias = var_4679_to_fp16, weight = var_4678_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")]; tensor var_4686_axes_0 = const()[name = string("op_4686_axes_0"), val = tensor([-1])]; tensor blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736291328)))]; tensor blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736293440)))]; tensor var_4686_cast_fp16 = layer_norm(axes = var_4686_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4686_cast_fp16")]; tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736295552)))]; tensor var_4696_to_fp16 = const()[name = string("op_4696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738392768)))]; tensor linear_172_cast_fp16 = linear(bias = var_4696_to_fp16, weight = var_4695_to_fp16, x = var_4686_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor concat_480 = const()[name = string("concat_480"), val = tensor([0, 0, 0])]; tensor concat_481 = const()[name = string("concat_481"), val = tensor([0, 1500, 0])]; tensor k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")]; tensor concat_482 = const()[name = string("concat_482"), val = tensor([0, 0, 0])]; tensor concat_483 = const()[name = string("concat_483"), val = tensor([0, 1500, 0])]; tensor v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")]; tensor concat_484x = const()[name = string("concat_484x"), val = tensor([1, -1, 16, 64])]; tensor var_4716_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4716_cast_fp16")]; tensor const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_175_cast_fp16 = mul(x = var_4716_cast_fp16, y = const_206_to_fp16)[name = string("q_175_cast_fp16")]; tensor var_4722 = const()[name = string("op_4722"), val = tensor([1, 1500, 16, -1])]; tensor var_4723_cast_fp16 = reshape(shape = var_4722, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4723_cast_fp16")]; tensor const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_219_cast_fp16 = mul(x = var_4723_cast_fp16, y = const_207_to_fp16)[name = string("k_219_cast_fp16")]; tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 1500, 16, -1])]; tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4730_cast_fp16")]; tensor var_4731 = const()[name = string("op_4731"), val = tensor([0, 2, 1, 3])]; bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)]; bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)]; tensor transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_280 = transpose(perm = transpose_280_perm_0, x = k_219_cast_fp16)[name = string("transpose_306")]; tensor transpose_279 = transpose(perm = transpose_279_perm_0, x = q_175_cast_fp16)[name = string("transpose_307")]; tensor qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_131_cast_fp16")]; tensor var_4735_cast_fp16 = softmax(axis = var_4579, x = qk_131_cast_fp16)[name = string("op_4735_cast_fp16")]; bool var_4737_transpose_x_0 = const()[name = string("op_4737_transpose_x_0"), val = bool(false)]; bool var_4737_transpose_y_0 = const()[name = string("op_4737_transpose_y_0"), val = bool(false)]; tensor v_219_cast_fp16 = transpose(perm = var_4731, x = var_4730_cast_fp16)[name = string("transpose_308")]; tensor var_4737_cast_fp16 = matmul(transpose_x = var_4737_transpose_x_0, transpose_y = var_4737_transpose_y_0, x = var_4735_cast_fp16, y = v_219_cast_fp16)[name = string("op_4737_cast_fp16")]; tensor var_4738 = const()[name = string("op_4738"), val = tensor([0, 2, 1, 3])]; tensor concat_485x = const()[name = string("concat_485x"), val = tensor([1, -1, 1024])]; tensor var_4739_cast_fp16 = transpose(perm = var_4738, x = var_4737_cast_fp16)[name = string("transpose_305")]; tensor x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4739_cast_fp16)[name = string("x_391_cast_fp16")]; tensor var_4743_to_fp16 = const()[name = string("op_4743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738394880)))]; tensor var_4744_to_fp16 = const()[name = string("op_4744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740492096)))]; tensor linear_173_cast_fp16 = linear(bias = var_4744_to_fp16, weight = var_4743_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")]; tensor var_4751_axes_0 = const()[name = string("op_4751_axes_0"), val = tensor([-1])]; tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740494208)))]; tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740496320)))]; tensor var_4751_cast_fp16 = layer_norm(axes = var_4751_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4751_cast_fp16")]; tensor var_4760_to_fp16 = const()[name = string("op_4760_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740498432)))]; tensor var_4761_to_fp16 = const()[name = string("op_4761_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748887104)))]; tensor linear_174_cast_fp16 = linear(bias = var_4761_to_fp16, weight = var_4760_to_fp16, x = var_4751_cast_fp16)[name = string("linear_174_cast_fp16")]; string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")]; tensor x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")]; tensor var_4766_to_fp16 = const()[name = string("op_4766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748895360)))]; tensor var_4767_to_fp16 = const()[name = string("op_4767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757284032)))]; tensor linear_175_cast_fp16 = linear(bias = var_4767_to_fp16, weight = var_4766_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")]; tensor k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; tensor k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor([23, 1, 448, 1024])]; tensor k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_89_cast_fp16")]; tensor v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; tensor v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor([23, 1, 448, 1024])]; tensor v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_89_cast_fp16")]; tensor k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; tensor k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor([23, 1, 1500, 1024])]; tensor k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")]; tensor v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; tensor v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor([23, 1, 1500, 1024])]; tensor v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")]; int32 var_4790 = const()[name = string("op_4790"), val = int32(-1)]; tensor var_4808_axes_0 = const()[name = string("op_4808_axes_0"), val = tensor([-1])]; tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757286144)))]; tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757288256)))]; fp16 var_4796_to_fp16 = const()[name = string("op_4796_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4808_cast_fp16 = layer_norm(axes = var_4808_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4808_cast_fp16")]; tensor var_4819_to_fp16 = const()[name = string("op_4819_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757290368)))]; tensor var_4820_to_fp16 = const()[name = string("op_4820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759387584)))]; tensor linear_176_cast_fp16 = linear(bias = var_4820_to_fp16, weight = var_4819_to_fp16, x = var_4808_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor var_4823_to_fp16 = const()[name = string("op_4823_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759389696)))]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4823_to_fp16, x = var_4808_cast_fp16)[name = string("linear_177_cast_fp16")]; tensor var_4827_to_fp16 = const()[name = string("op_4827_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761486912)))]; tensor var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763584128)))]; tensor linear_178_cast_fp16 = linear(bias = var_4828_to_fp16, weight = var_4827_to_fp16, x = var_4808_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor var_4830_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4830_shape_cast_fp16")]; int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)]; int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)]; bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)]; string var_4830_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4830_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)]; tensor var_4830_shape_cast_fp16_to_uint16 = cast(dtype = var_4830_shape_cast_fp16_to_uint16_dtype_0, x = var_4830_shape_cast_fp16)[name = string("cast_250")]; uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4830_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")]; string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_249")]; int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")]; tensor expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor([0])]; tensor expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor([0])]; tensor expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor([0])]; tensor expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")]; tensor concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor([22])]; int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")]; tensor concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor([0])]; tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")]; tensor k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")]; tensor v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")]; int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)]; int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1024)]; int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)]; bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)]; tensor concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")]; tensor var_4846_begin_0 = const()[name = string("op_4846_begin_0"), val = tensor([0, 0, 0])]; tensor var_4846_end_mask_0 = const()[name = string("op_4846_end_mask_0"), val = tensor([true, false, true])]; tensor var_4846_cast_fp16 = slice_by_index(begin = var_4846_begin_0, end = concat_494, end_mask = var_4846_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4846_cast_fp16")]; tensor var_4849_begin_0 = const()[name = string("op_4849_begin_0"), val = tensor([0, 0, 0])]; tensor var_4849_end_mask_0 = const()[name = string("op_4849_end_mask_0"), val = tensor([true, false, true])]; tensor var_4849_cast_fp16 = slice_by_index(begin = var_4849_begin_0, end = concat_494, end_mask = var_4849_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4849_cast_fp16")]; tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 16, 64])]; tensor var_4859_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4859_cast_fp16")]; tensor const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_179_cast_fp16 = mul(x = var_4859_cast_fp16, y = const_208_to_fp16)[name = string("q_179_cast_fp16")]; tensor concat_497x = const()[name = string("concat_497x"), val = tensor([1, -1, 16, 64])]; tensor var_4866_cast_fp16 = reshape(shape = concat_497x, x = var_4846_cast_fp16)[name = string("op_4866_cast_fp16")]; tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_225_cast_fp16 = mul(x = var_4866_cast_fp16, y = const_209_to_fp16)[name = string("k_225_cast_fp16")]; tensor concat_498x = const()[name = string("concat_498x"), val = tensor([1, -1, 16, 64])]; tensor var_4873_cast_fp16 = reshape(shape = concat_498x, x = var_4849_cast_fp16)[name = string("op_4873_cast_fp16")]; tensor var_4874 = const()[name = string("op_4874"), val = tensor([0, 2, 1, 3])]; bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)]; bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)]; tensor transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_282 = transpose(perm = transpose_282_perm_0, x = k_225_cast_fp16)[name = string("transpose_302")]; tensor transpose_281 = transpose(perm = transpose_281_perm_0, x = q_179_cast_fp16)[name = string("transpose_303")]; tensor qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_133_cast_fp16")]; int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)]; int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")]; tensor var_4877_begin_0 = const()[name = string("op_4877_begin_0"), val = tensor([0, 0])]; tensor var_4877_end_mask_0 = const()[name = string("op_4877_end_mask_0"), val = tensor([false, true])]; tensor var_4877_cast_fp16 = slice_by_index(begin = var_4877_begin_0, end = concat_499, end_mask = var_4877_end_mask_0, x = mask_to_fp16)[name = string("op_4877_cast_fp16")]; int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)]; int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)]; bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)]; tensor concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")]; tensor var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor([0, 0])]; tensor var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor([true, false])]; tensor var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_500, end_mask = var_4878_end_mask_0, x = var_4877_cast_fp16)[name = string("op_4878_cast_fp16")]; tensor qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4878_cast_fp16)[name = string("qk_135_cast_fp16")]; tensor var_4881_cast_fp16 = softmax(axis = var_4790, x = qk_135_cast_fp16)[name = string("op_4881_cast_fp16")]; bool var_4883_transpose_x_0 = const()[name = string("op_4883_transpose_x_0"), val = bool(false)]; bool var_4883_transpose_y_0 = const()[name = string("op_4883_transpose_y_0"), val = bool(false)]; tensor v_225_cast_fp16 = transpose(perm = var_4874, x = var_4873_cast_fp16)[name = string("transpose_304")]; tensor var_4883_cast_fp16 = matmul(transpose_x = var_4883_transpose_x_0, transpose_y = var_4883_transpose_y_0, x = var_4881_cast_fp16, y = v_225_cast_fp16)[name = string("op_4883_cast_fp16")]; tensor var_4884 = const()[name = string("op_4884"), val = tensor([0, 2, 1, 3])]; tensor concat_501x = const()[name = string("concat_501x"), val = tensor([1, -1, 1024])]; tensor var_4885_cast_fp16 = transpose(perm = var_4884, x = var_4883_cast_fp16)[name = string("transpose_301")]; tensor x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4885_cast_fp16)[name = string("x_403_cast_fp16")]; tensor var_4889_to_fp16 = const()[name = string("op_4889_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763586240)))]; tensor var_4890_to_fp16 = const()[name = string("op_4890_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765683456)))]; tensor linear_179_cast_fp16 = linear(bias = var_4890_to_fp16, weight = var_4889_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")]; tensor var_4897_axes_0 = const()[name = string("op_4897_axes_0"), val = tensor([-1])]; tensor blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765685568)))]; tensor blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765687680)))]; tensor var_4897_cast_fp16 = layer_norm(axes = var_4897_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4897_cast_fp16")]; tensor var_4906_to_fp16 = const()[name = string("op_4906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765689792)))]; tensor var_4907_to_fp16 = const()[name = string("op_4907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767787008)))]; tensor linear_180_cast_fp16 = linear(bias = var_4907_to_fp16, weight = var_4906_to_fp16, x = var_4897_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor concat_502 = const()[name = string("concat_502"), val = tensor([0, 0, 0])]; tensor concat_503 = const()[name = string("concat_503"), val = tensor([0, 1500, 0])]; tensor k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")]; tensor concat_504 = const()[name = string("concat_504"), val = tensor([0, 0, 0])]; tensor concat_505 = const()[name = string("concat_505"), val = tensor([0, 1500, 0])]; tensor v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")]; tensor concat_506x = const()[name = string("concat_506x"), val = tensor([1, -1, 16, 64])]; tensor var_4927_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4927_cast_fp16")]; tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_183_cast_fp16 = mul(x = var_4927_cast_fp16, y = const_210_to_fp16)[name = string("q_183_cast_fp16")]; tensor var_4933 = const()[name = string("op_4933"), val = tensor([1, 1500, 16, -1])]; tensor var_4934_cast_fp16 = reshape(shape = var_4933, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4934_cast_fp16")]; tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_229_cast_fp16 = mul(x = var_4934_cast_fp16, y = const_211_to_fp16)[name = string("k_229_cast_fp16")]; tensor var_4940 = const()[name = string("op_4940"), val = tensor([1, 1500, 16, -1])]; tensor var_4941_cast_fp16 = reshape(shape = var_4940, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4941_cast_fp16")]; tensor var_4942 = const()[name = string("op_4942"), val = tensor([0, 2, 1, 3])]; bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)]; bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)]; tensor transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_284 = transpose(perm = transpose_284_perm_0, x = k_229_cast_fp16)[name = string("transpose_298")]; tensor transpose_283 = transpose(perm = transpose_283_perm_0, x = q_183_cast_fp16)[name = string("transpose_299")]; tensor qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_137_cast_fp16")]; tensor var_4946_cast_fp16 = softmax(axis = var_4790, x = qk_137_cast_fp16)[name = string("op_4946_cast_fp16")]; bool var_4948_transpose_x_0 = const()[name = string("op_4948_transpose_x_0"), val = bool(false)]; bool var_4948_transpose_y_0 = const()[name = string("op_4948_transpose_y_0"), val = bool(false)]; tensor v_229_cast_fp16 = transpose(perm = var_4942, x = var_4941_cast_fp16)[name = string("transpose_300")]; tensor var_4948_cast_fp16 = matmul(transpose_x = var_4948_transpose_x_0, transpose_y = var_4948_transpose_y_0, x = var_4946_cast_fp16, y = v_229_cast_fp16)[name = string("op_4948_cast_fp16")]; tensor var_4949 = const()[name = string("op_4949"), val = tensor([0, 2, 1, 3])]; tensor concat_507x = const()[name = string("concat_507x"), val = tensor([1, -1, 1024])]; tensor var_4950_cast_fp16 = transpose(perm = var_4949, x = var_4948_cast_fp16)[name = string("transpose_297")]; tensor x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4950_cast_fp16)[name = string("x_409_cast_fp16")]; tensor var_4954_to_fp16 = const()[name = string("op_4954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767789120)))]; tensor var_4955_to_fp16 = const()[name = string("op_4955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769886336)))]; tensor linear_181_cast_fp16 = linear(bias = var_4955_to_fp16, weight = var_4954_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")]; tensor var_4962_axes_0 = const()[name = string("op_4962_axes_0"), val = tensor([-1])]; tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769888448)))]; tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769890560)))]; tensor var_4962_cast_fp16 = layer_norm(axes = var_4962_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4962_cast_fp16")]; tensor var_4971_to_fp16 = const()[name = string("op_4971_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769892672)))]; tensor var_4972_to_fp16 = const()[name = string("op_4972_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778281344)))]; tensor linear_182_cast_fp16 = linear(bias = var_4972_to_fp16, weight = var_4971_to_fp16, x = var_4962_cast_fp16)[name = string("linear_182_cast_fp16")]; string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")]; tensor x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")]; tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778289600)))]; tensor var_4978_to_fp16 = const()[name = string("op_4978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786678272)))]; tensor linear_183_cast_fp16 = linear(bias = var_4978_to_fp16, weight = var_4977_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")]; tensor k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; tensor k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor([24, 1, 448, 1024])]; tensor k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_93_cast_fp16")]; tensor v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; tensor v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor([24, 1, 448, 1024])]; tensor v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_93_cast_fp16")]; tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([23, 0, 0, 0])]; tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([24, 1, 1500, 1024])]; tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([23, 0, 0, 0])]; tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([24, 1, 1500, 1024])]; tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; int32 var_5001 = const()[name = string("op_5001"), val = int32(-1)]; tensor var_5019_axes_0 = const()[name = string("op_5019_axes_0"), val = tensor([-1])]; tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786680384)))]; tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786682496)))]; fp16 var_5007_to_fp16 = const()[name = string("op_5007_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5019_cast_fp16 = layer_norm(axes = var_5019_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5019_cast_fp16")]; tensor var_5030_to_fp16 = const()[name = string("op_5030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786684608)))]; tensor var_5031_to_fp16 = const()[name = string("op_5031_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788781824)))]; tensor linear_184_cast_fp16 = linear(bias = var_5031_to_fp16, weight = var_5030_to_fp16, x = var_5019_cast_fp16)[name = string("linear_184_cast_fp16")]; tensor var_5034_to_fp16 = const()[name = string("op_5034_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788783936)))]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5034_to_fp16, x = var_5019_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor var_5038_to_fp16 = const()[name = string("op_5038_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790881152)))]; tensor var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792978368)))]; tensor linear_186_cast_fp16 = linear(bias = var_5039_to_fp16, weight = var_5038_to_fp16, x = var_5019_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_5041_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5041_shape_cast_fp16")]; int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)]; int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)]; bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)]; string var_5041_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5041_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)]; tensor var_5041_shape_cast_fp16_to_uint16 = cast(dtype = var_5041_shape_cast_fp16_to_uint16_dtype_0, x = var_5041_shape_cast_fp16)[name = string("cast_248")]; uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5041_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")]; string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_247")]; int32 end_step = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step")]; tensor expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor([0])]; tensor expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor([0])]; tensor expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor([0])]; tensor expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step)[name = string("expand_dims_371")]; tensor concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor([23])]; int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)]; bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)]; tensor concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")]; tensor concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor([0])]; tensor concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor([0])]; tensor concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor([0])]; int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")]; tensor k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")]; tensor v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")]; int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)]; int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1024)]; int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)]; bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)]; tensor concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step, concat_516_values2_0))[name = string("concat_516")]; tensor var_5057_begin_0 = const()[name = string("op_5057_begin_0"), val = tensor([0, 0, 0])]; tensor var_5057_end_mask_0 = const()[name = string("op_5057_end_mask_0"), val = tensor([true, false, true])]; tensor var_5057_cast_fp16 = slice_by_index(begin = var_5057_begin_0, end = concat_516, end_mask = var_5057_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5057_cast_fp16")]; tensor var_5060_begin_0 = const()[name = string("op_5060_begin_0"), val = tensor([0, 0, 0])]; tensor var_5060_end_mask_0 = const()[name = string("op_5060_end_mask_0"), val = tensor([true, false, true])]; tensor var_5060_cast_fp16 = slice_by_index(begin = var_5060_begin_0, end = concat_516, end_mask = var_5060_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5060_cast_fp16")]; tensor concat_518x = const()[name = string("concat_518x"), val = tensor([1, -1, 16, 64])]; tensor var_5070_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5070_cast_fp16")]; tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_187_cast_fp16 = mul(x = var_5070_cast_fp16, y = const_212_to_fp16)[name = string("q_187_cast_fp16")]; tensor concat_519x = const()[name = string("concat_519x"), val = tensor([1, -1, 16, 64])]; tensor var_5077_cast_fp16 = reshape(shape = concat_519x, x = var_5057_cast_fp16)[name = string("op_5077_cast_fp16")]; tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_235_cast_fp16 = mul(x = var_5077_cast_fp16, y = const_213_to_fp16)[name = string("k_235_cast_fp16")]; tensor concat_520x = const()[name = string("concat_520x"), val = tensor([1, -1, 16, 64])]; tensor var_5084_cast_fp16 = reshape(shape = concat_520x, x = var_5060_cast_fp16)[name = string("op_5084_cast_fp16")]; tensor var_5085 = const()[name = string("op_5085"), val = tensor([0, 2, 1, 3])]; bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)]; bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)]; tensor transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_286 = transpose(perm = transpose_286_perm_0, x = k_235_cast_fp16)[name = string("transpose_294")]; tensor transpose_285 = transpose(perm = transpose_285_perm_0, x = q_187_cast_fp16)[name = string("transpose_295")]; tensor qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_139_cast_fp16")]; int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)]; int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)]; bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)]; tensor concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")]; tensor var_5088_begin_0 = const()[name = string("op_5088_begin_0"), val = tensor([0, 0])]; tensor var_5088_end_mask_0 = const()[name = string("op_5088_end_mask_0"), val = tensor([false, true])]; tensor var_5088_cast_fp16 = slice_by_index(begin = var_5088_begin_0, end = concat_521, end_mask = var_5088_end_mask_0, x = mask_to_fp16)[name = string("op_5088_cast_fp16")]; int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)]; int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)]; bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)]; tensor concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")]; tensor var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor([0, 0])]; tensor var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor([true, false])]; tensor var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_522, end_mask = var_5089_end_mask_0, x = var_5088_cast_fp16)[name = string("op_5089_cast_fp16")]; tensor qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5089_cast_fp16)[name = string("qk_141_cast_fp16")]; tensor var_5092_cast_fp16 = softmax(axis = var_5001, x = qk_141_cast_fp16)[name = string("op_5092_cast_fp16")]; bool var_5094_transpose_x_0 = const()[name = string("op_5094_transpose_x_0"), val = bool(false)]; bool var_5094_transpose_y_0 = const()[name = string("op_5094_transpose_y_0"), val = bool(false)]; tensor v_235_cast_fp16 = transpose(perm = var_5085, x = var_5084_cast_fp16)[name = string("transpose_296")]; tensor var_5094_cast_fp16 = matmul(transpose_x = var_5094_transpose_x_0, transpose_y = var_5094_transpose_y_0, x = var_5092_cast_fp16, y = v_235_cast_fp16)[name = string("op_5094_cast_fp16")]; tensor var_5095 = const()[name = string("op_5095"), val = tensor([0, 2, 1, 3])]; tensor concat_523x = const()[name = string("concat_523x"), val = tensor([1, -1, 1024])]; tensor var_5096_cast_fp16 = transpose(perm = var_5095, x = var_5094_cast_fp16)[name = string("transpose_293")]; tensor x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5096_cast_fp16)[name = string("x_421_cast_fp16")]; tensor var_5100_to_fp16 = const()[name = string("op_5100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792980480)))]; tensor var_5101_to_fp16 = const()[name = string("op_5101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795077696)))]; tensor linear_187_cast_fp16 = linear(bias = var_5101_to_fp16, weight = var_5100_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")]; tensor var_5108_axes_0 = const()[name = string("op_5108_axes_0"), val = tensor([-1])]; tensor blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795079808)))]; tensor blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795081920)))]; tensor var_5108_cast_fp16 = layer_norm(axes = var_5108_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5108_cast_fp16")]; tensor var_5117_to_fp16 = const()[name = string("op_5117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795084032)))]; tensor var_5118_to_fp16 = const()[name = string("op_5118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797181248)))]; tensor linear_188_cast_fp16 = linear(bias = var_5118_to_fp16, weight = var_5117_to_fp16, x = var_5108_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor concat_524 = const()[name = string("concat_524"), val = tensor([0, 0, 0])]; tensor concat_525 = const()[name = string("concat_525"), val = tensor([0, 1500, 0])]; tensor k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")]; tensor concat_526 = const()[name = string("concat_526"), val = tensor([0, 0, 0])]; tensor concat_527 = const()[name = string("concat_527"), val = tensor([0, 1500, 0])]; tensor v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")]; tensor concat_528x = const()[name = string("concat_528x"), val = tensor([1, -1, 16, 64])]; tensor var_5138_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5138_cast_fp16")]; tensor const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_cast_fp16 = mul(x = var_5138_cast_fp16, y = const_214_to_fp16)[name = string("q_cast_fp16")]; tensor var_5144 = const()[name = string("op_5144"), val = tensor([1, 1500, 16, -1])]; tensor var_5145_cast_fp16 = reshape(shape = var_5144, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5145_cast_fp16")]; tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_cast_fp16 = mul(x = var_5145_cast_fp16, y = const_215_to_fp16)[name = string("k_cast_fp16")]; tensor var_5151 = const()[name = string("op_5151"), val = tensor([1, 1500, 16, -1])]; tensor var_5152_cast_fp16 = reshape(shape = var_5151, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5152_cast_fp16")]; tensor var_5153 = const()[name = string("op_5153"), val = tensor([0, 2, 1, 3])]; bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; tensor transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_288 = transpose(perm = transpose_288_perm_0, x = k_cast_fp16)[name = string("transpose_290")]; tensor transpose_287 = transpose(perm = transpose_287_perm_0, x = q_cast_fp16)[name = string("transpose_291")]; tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_cast_fp16")]; tensor var_5157_cast_fp16 = softmax(axis = var_5001, x = qk_cast_fp16)[name = string("op_5157_cast_fp16")]; bool var_5159_transpose_x_0 = const()[name = string("op_5159_transpose_x_0"), val = bool(false)]; bool var_5159_transpose_y_0 = const()[name = string("op_5159_transpose_y_0"), val = bool(false)]; tensor v_cast_fp16 = transpose(perm = var_5153, x = var_5152_cast_fp16)[name = string("transpose_292")]; tensor var_5159_cast_fp16 = matmul(transpose_x = var_5159_transpose_x_0, transpose_y = var_5159_transpose_y_0, x = var_5157_cast_fp16, y = v_cast_fp16)[name = string("op_5159_cast_fp16")]; tensor var_5160 = const()[name = string("op_5160"), val = tensor([0, 2, 1, 3])]; tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, -1, 1024])]; tensor var_5161_cast_fp16 = transpose(perm = var_5160, x = var_5159_cast_fp16)[name = string("transpose_289")]; tensor x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5161_cast_fp16)[name = string("x_427_cast_fp16")]; tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797183360)))]; tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799280576)))]; tensor linear_189_cast_fp16 = linear(bias = var_5166_to_fp16, weight = var_5165_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")]; tensor var_5173_axes_0 = const()[name = string("op_5173_axes_0"), val = tensor([-1])]; tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799282688)))]; tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799284800)))]; tensor var_5173_cast_fp16 = layer_norm(axes = var_5173_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5173_cast_fp16")]; tensor var_5182_to_fp16 = const()[name = string("op_5182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799286912)))]; tensor var_5183_to_fp16 = const()[name = string("op_5183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807675584)))]; tensor linear_190_cast_fp16 = linear(bias = var_5183_to_fp16, weight = var_5182_to_fp16, x = var_5173_cast_fp16)[name = string("linear_190_cast_fp16")]; string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")]; tensor x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")]; tensor var_5188_to_fp16 = const()[name = string("op_5188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807683840)))]; tensor var_5189_to_fp16 = const()[name = string("op_5189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816072512)))]; tensor linear_191_cast_fp16 = linear(bias = var_5189_to_fp16, weight = var_5188_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")]; tensor x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")]; tensor var_5202_axes_0 = const()[name = string("op_5202_axes_0"), val = tensor([-1])]; tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816074624)))]; tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816076736)))]; fp16 var_5193_to_fp16 = const()[name = string("op_5193_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5202_cast_fp16 = layer_norm(axes = var_5202_axes_0, beta = ln_bias_to_fp16, epsilon = var_5193_to_fp16, gamma = ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5202_cast_fp16")]; tensor var_5212_bias_0_to_fp16 = const()[name = string("op_5212_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816078848)))]; tensor logits = linear(bias = var_5212_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_5202_cast_fp16)[name = string("op_5212_cast_fp16")]; } -> (logits); }