program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] { func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { tensor var_78_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_78_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_78_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_78_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; tensor var_78_shape_cast_fp16_to_int16 = cast(dtype = var_78_shape_cast_fp16_to_int16_dtype_0, x = var_78_shape_cast_fp16)[name = string("cast_394")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_78_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_82_shape = shape(x = token_data)[name = string("op_82_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_82_shape_to_uint16_dtype_0 = const()[name = string("op_82_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_82_shape_to_uint16 = cast(dtype = var_82_shape_to_uint16_dtype_0, x = var_82_shape)[name = string("cast_392")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_82_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_391")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_393")]; int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; int32 var_154_axis_0 = const()[name = string("op_154_axis_0"), val = int32(0)]; int32 var_154_batch_dims_0 = const()[name = string("op_154_batch_dims_0"), val = int32(0)]; bool var_154_validate_indices_0 = const()[name = string("op_154_validate_indices_0"), val = bool(false)]; tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_154_cast_fp16 = gather(axis = var_154_axis_0, batch_dims = var_154_batch_dims_0, indices = token_data, validate_indices = var_154_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_154_cast_fp16")]; int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1280)]; int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; tensor var_157_end_mask_0 = const()[name = string("op_157_end_mask_0"), val = tensor([false, true])]; tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132777088)))]; tensor var_157_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_157_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_157_cast_fp16")]; tensor x_3_cast_fp16 = add(x = var_154_cast_fp16, y = var_157_cast_fp16)[name = string("x_3_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 1280])]; tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 1280])]; tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 1280])]; tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 1280])]; tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; int32 var_180 = const()[name = string("op_180"), val = int32(-1)]; tensor var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor([-1])]; tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924032)))]; tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133926656)))]; fp16 var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_198_cast_fp16 = layer_norm(axes = var_198_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_198_cast_fp16")]; tensor var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133929280)))]; tensor var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137206144)))]; tensor linear_0_cast_fp16 = linear(bias = var_210_to_fp16, weight = var_209_to_fp16, x = var_198_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137208768)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140485632)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_213_to_fp16, x = var_198_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140488256)))]; tensor var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143765120)))]; tensor linear_2_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_198_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_220_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_220_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_220_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_220_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_220_shape_cast_fp16_to_uint16 = cast(dtype = var_220_shape_cast_fp16_to_uint16_dtype_0, x = var_220_shape_cast_fp16)[name = string("cast_390")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_220_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_389")]; int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")]; tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")]; int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1280)]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; tensor var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor([0, 0, 0])]; tensor var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor([true, false, true])]; tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_10, end_mask = var_236_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_236_cast_fp16")]; tensor var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor([0, 0, 0])]; tensor var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor([true, false, true])]; tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = concat_10, end_mask = var_239_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_239_cast_fp16")]; tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 20, 64])]; tensor var_249_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_249_cast_fp16")]; tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_3_cast_fp16 = mul(x = var_249_cast_fp16, y = const_160_to_fp16)[name = string("q_3_cast_fp16")]; tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 20, 64])]; tensor var_256_cast_fp16 = reshape(shape = concat_13x, x = var_236_cast_fp16)[name = string("op_256_cast_fp16")]; tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_5_cast_fp16 = mul(x = var_256_cast_fp16, y = const_161_to_fp16)[name = string("k_5_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 20, 64])]; tensor var_263_cast_fp16 = reshape(shape = concat_14x, x = var_239_cast_fp16)[name = string("op_263_cast_fp16")]; tensor var_264 = const()[name = string("op_264"), val = tensor([0, 2, 1, 3])]; bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; tensor transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_258 = transpose(perm = transpose_258_perm_0, x = k_5_cast_fp16)[name = string("transpose_638")]; tensor transpose_257 = transpose(perm = transpose_257_perm_0, x = q_3_cast_fp16)[name = string("transpose_639")]; tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_1_cast_fp16")]; int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; tensor var_267_begin_0 = const()[name = string("op_267_begin_0"), val = tensor([0, 0])]; tensor var_267_end_mask_0 = const()[name = string("op_267_end_mask_0"), val = tensor([false, true])]; tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143767744)))]; tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = concat_15, end_mask = var_267_end_mask_0, x = mask_to_fp16)[name = string("op_267_cast_fp16")]; int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; tensor var_268_begin_0 = const()[name = string("op_268_begin_0"), val = tensor([0, 0])]; tensor var_268_end_mask_0 = const()[name = string("op_268_end_mask_0"), val = tensor([true, false])]; tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = concat_16, end_mask = var_268_end_mask_0, x = var_267_cast_fp16)[name = string("op_268_cast_fp16")]; tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_268_cast_fp16)[name = string("qk_3_cast_fp16")]; tensor var_271_cast_fp16 = softmax(axis = var_180, x = qk_3_cast_fp16)[name = string("op_271_cast_fp16")]; bool var_273_transpose_x_0 = const()[name = string("op_273_transpose_x_0"), val = bool(false)]; bool var_273_transpose_y_0 = const()[name = string("op_273_transpose_y_0"), val = bool(false)]; tensor v_5_cast_fp16 = transpose(perm = var_264, x = var_263_cast_fp16)[name = string("transpose_640")]; tensor var_273_cast_fp16 = matmul(transpose_x = var_273_transpose_x_0, transpose_y = var_273_transpose_y_0, x = var_271_cast_fp16, y = v_5_cast_fp16)[name = string("op_273_cast_fp16")]; tensor var_274 = const()[name = string("op_274"), val = tensor([0, 2, 1, 3])]; tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 1280])]; tensor var_275_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_637")]; tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_275_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144169216)))]; tensor var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147446080)))]; tensor linear_3_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_287_axes_0 = const()[name = string("op_287_axes_0"), val = tensor([-1])]; tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147448704)))]; tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147451328)))]; tensor var_287_cast_fp16 = layer_norm(axes = var_287_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_287_cast_fp16")]; tensor var_296_to_fp16 = const()[name = string("op_296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147453952)))]; tensor var_297_to_fp16 = const()[name = string("op_297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150730816)))]; tensor linear_4_cast_fp16 = linear(bias = var_297_to_fp16, weight = var_296_to_fp16, x = var_287_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150733440)))]; tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 20, 64])]; tensor var_317_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_317_cast_fp16")]; tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_7_cast_fp16 = mul(x = var_317_cast_fp16, y = const_162_to_fp16)[name = string("q_7_cast_fp16")]; tensor var_323 = const()[name = string("op_323"), val = tensor([1, 1500, 20, -1])]; tensor var_324_cast_fp16 = reshape(shape = var_323, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_324_cast_fp16")]; tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_9_cast_fp16 = mul(x = var_324_cast_fp16, y = const_163_to_fp16)[name = string("k_9_cast_fp16")]; tensor var_330 = const()[name = string("op_330"), val = tensor([1, 1500, 20, -1])]; tensor var_331_cast_fp16 = reshape(shape = var_330, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_331_cast_fp16")]; tensor var_332 = const()[name = string("op_332"), val = tensor([0, 2, 1, 3])]; bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; tensor transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_260 = transpose(perm = transpose_260_perm_0, x = k_9_cast_fp16)[name = string("transpose_634")]; tensor transpose_259 = transpose(perm = transpose_259_perm_0, x = q_7_cast_fp16)[name = string("transpose_635")]; tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_5_cast_fp16")]; tensor var_336_cast_fp16 = softmax(axis = var_180, x = qk_5_cast_fp16)[name = string("op_336_cast_fp16")]; bool var_338_transpose_x_0 = const()[name = string("op_338_transpose_x_0"), val = bool(false)]; bool var_338_transpose_y_0 = const()[name = string("op_338_transpose_y_0"), val = bool(false)]; tensor v_9_cast_fp16 = transpose(perm = var_332, x = var_331_cast_fp16)[name = string("transpose_636")]; tensor var_338_cast_fp16 = matmul(transpose_x = var_338_transpose_x_0, transpose_y = var_338_transpose_y_0, x = var_336_cast_fp16, y = v_9_cast_fp16)[name = string("op_338_cast_fp16")]; tensor var_339 = const()[name = string("op_339"), val = tensor([0, 2, 1, 3])]; tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 1280])]; tensor var_340_cast_fp16 = transpose(perm = var_339, x = var_338_cast_fp16)[name = string("transpose_633")]; tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_340_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154573504)))]; tensor var_345_to_fp16 = const()[name = string("op_345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157850368)))]; tensor linear_5_cast_fp16 = linear(bias = var_345_to_fp16, weight = var_344_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor([-1])]; tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157852992)))]; tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157855616)))]; tensor var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_352_cast_fp16")]; tensor var_361_to_fp16 = const()[name = string("op_361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157858240)))]; tensor var_362_to_fp16 = const()[name = string("op_362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170965504)))]; tensor linear_6_cast_fp16 = linear(bias = var_362_to_fp16, weight = var_361_to_fp16, x = var_352_cast_fp16)[name = string("linear_6_cast_fp16")]; string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170975808)))]; tensor var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184083072)))]; tensor linear_7_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 1280])]; tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_5_cast_fp16")]; tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 1280])]; tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_5_cast_fp16")]; tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 1280])]; tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 1280])]; tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; int32 var_391 = const()[name = string("op_391"), val = int32(-1)]; tensor var_409_axes_0 = const()[name = string("op_409_axes_0"), val = tensor([-1])]; tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184085696)))]; tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184088320)))]; fp16 var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_409_cast_fp16 = layer_norm(axes = var_409_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_409_cast_fp16")]; tensor var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184090944)))]; tensor var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187367808)))]; tensor linear_8_cast_fp16 = linear(bias = var_421_to_fp16, weight = var_420_to_fp16, x = var_409_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_424_to_fp16 = const()[name = string("op_424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187370432)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_424_to_fp16, x = var_409_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190647296)))]; tensor var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193924160)))]; tensor linear_10_cast_fp16 = linear(bias = var_429_to_fp16, weight = var_428_to_fp16, x = var_409_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_431_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_431_shape_cast_fp16")]; int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; string var_431_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_431_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; tensor var_431_shape_cast_fp16_to_uint16 = cast(dtype = var_431_shape_cast_fp16_to_uint16_dtype_0, x = var_431_shape_cast_fp16)[name = string("cast_388")]; uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_431_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_387")]; int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")]; tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")]; int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1280)]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; tensor var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor([0, 0, 0])]; tensor var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor([true, false, true])]; tensor var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_32, end_mask = var_447_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_447_cast_fp16")]; tensor var_450_begin_0 = const()[name = string("op_450_begin_0"), val = tensor([0, 0, 0])]; tensor var_450_end_mask_0 = const()[name = string("op_450_end_mask_0"), val = tensor([true, false, true])]; tensor var_450_cast_fp16 = slice_by_index(begin = var_450_begin_0, end = concat_32, end_mask = var_450_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_450_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 20, 64])]; tensor var_460_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_460_cast_fp16")]; tensor const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_11_cast_fp16 = mul(x = var_460_cast_fp16, y = const_164_to_fp16)[name = string("q_11_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 20, 64])]; tensor var_467_cast_fp16 = reshape(shape = concat_35x, x = var_447_cast_fp16)[name = string("op_467_cast_fp16")]; tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_15_cast_fp16 = mul(x = var_467_cast_fp16, y = const_165_to_fp16)[name = string("k_15_cast_fp16")]; tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 20, 64])]; tensor var_474_cast_fp16 = reshape(shape = concat_36x, x = var_450_cast_fp16)[name = string("op_474_cast_fp16")]; tensor var_475 = const()[name = string("op_475"), val = tensor([0, 2, 1, 3])]; bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; tensor transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_262 = transpose(perm = transpose_262_perm_0, x = k_15_cast_fp16)[name = string("transpose_630")]; tensor transpose_261 = transpose(perm = transpose_261_perm_0, x = q_11_cast_fp16)[name = string("transpose_631")]; tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_7_cast_fp16")]; int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; tensor var_478_begin_0 = const()[name = string("op_478_begin_0"), val = tensor([0, 0])]; tensor var_478_end_mask_0 = const()[name = string("op_478_end_mask_0"), val = tensor([false, true])]; tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = concat_37, end_mask = var_478_end_mask_0, x = mask_to_fp16)[name = string("op_478_cast_fp16")]; int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; tensor var_479_begin_0 = const()[name = string("op_479_begin_0"), val = tensor([0, 0])]; tensor var_479_end_mask_0 = const()[name = string("op_479_end_mask_0"), val = tensor([true, false])]; tensor var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = concat_38, end_mask = var_479_end_mask_0, x = var_478_cast_fp16)[name = string("op_479_cast_fp16")]; tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_479_cast_fp16)[name = string("qk_9_cast_fp16")]; tensor var_482_cast_fp16 = softmax(axis = var_391, x = qk_9_cast_fp16)[name = string("op_482_cast_fp16")]; bool var_484_transpose_x_0 = const()[name = string("op_484_transpose_x_0"), val = bool(false)]; bool var_484_transpose_y_0 = const()[name = string("op_484_transpose_y_0"), val = bool(false)]; tensor v_15_cast_fp16 = transpose(perm = var_475, x = var_474_cast_fp16)[name = string("transpose_632")]; tensor var_484_cast_fp16 = matmul(transpose_x = var_484_transpose_x_0, transpose_y = var_484_transpose_y_0, x = var_482_cast_fp16, y = v_15_cast_fp16)[name = string("op_484_cast_fp16")]; tensor var_485 = const()[name = string("op_485"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 1280])]; tensor var_486_cast_fp16 = transpose(perm = var_485, x = var_484_cast_fp16)[name = string("transpose_629")]; tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_486_cast_fp16)[name = string("x_25_cast_fp16")]; tensor var_490_to_fp16 = const()[name = string("op_490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193926784)))]; tensor var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197203648)))]; tensor linear_11_cast_fp16 = linear(bias = var_491_to_fp16, weight = var_490_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor([-1])]; tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197206272)))]; tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197208896)))]; tensor var_498_cast_fp16 = layer_norm(axes = var_498_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_498_cast_fp16")]; tensor var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197211520)))]; tensor var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200488384)))]; tensor linear_12_cast_fp16 = linear(bias = var_508_to_fp16, weight = var_507_to_fp16, x = var_498_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 20, 64])]; tensor var_528_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_528_cast_fp16")]; tensor const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_15_cast_fp16 = mul(x = var_528_cast_fp16, y = const_166_to_fp16)[name = string("q_15_cast_fp16")]; tensor var_534 = const()[name = string("op_534"), val = tensor([1, 1500, 20, -1])]; tensor var_535_cast_fp16 = reshape(shape = var_534, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_535_cast_fp16")]; tensor const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_19_cast_fp16 = mul(x = var_535_cast_fp16, y = const_167_to_fp16)[name = string("k_19_cast_fp16")]; tensor var_541 = const()[name = string("op_541"), val = tensor([1, 1500, 20, -1])]; tensor var_542_cast_fp16 = reshape(shape = var_541, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_542_cast_fp16")]; tensor var_543 = const()[name = string("op_543"), val = tensor([0, 2, 1, 3])]; bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; tensor transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_264 = transpose(perm = transpose_264_perm_0, x = k_19_cast_fp16)[name = string("transpose_626")]; tensor transpose_263 = transpose(perm = transpose_263_perm_0, x = q_15_cast_fp16)[name = string("transpose_627")]; tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_11_cast_fp16")]; tensor var_547_cast_fp16 = softmax(axis = var_391, x = qk_11_cast_fp16)[name = string("op_547_cast_fp16")]; bool var_549_transpose_x_0 = const()[name = string("op_549_transpose_x_0"), val = bool(false)]; bool var_549_transpose_y_0 = const()[name = string("op_549_transpose_y_0"), val = bool(false)]; tensor v_19_cast_fp16 = transpose(perm = var_543, x = var_542_cast_fp16)[name = string("transpose_628")]; tensor var_549_cast_fp16 = matmul(transpose_x = var_549_transpose_x_0, transpose_y = var_549_transpose_y_0, x = var_547_cast_fp16, y = v_19_cast_fp16)[name = string("op_549_cast_fp16")]; tensor var_550 = const()[name = string("op_550"), val = tensor([0, 2, 1, 3])]; tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 1280])]; tensor var_551_cast_fp16 = transpose(perm = var_550, x = var_549_cast_fp16)[name = string("transpose_625")]; tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_551_cast_fp16)[name = string("x_31_cast_fp16")]; tensor var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200491008)))]; tensor var_556_to_fp16 = const()[name = string("op_556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203767872)))]; tensor linear_13_cast_fp16 = linear(bias = var_556_to_fp16, weight = var_555_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor([-1])]; tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203770496)))]; tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203773120)))]; tensor var_563_cast_fp16 = layer_norm(axes = var_563_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_563_cast_fp16")]; tensor var_572_to_fp16 = const()[name = string("op_572_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203775744)))]; tensor var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216883008)))]; tensor linear_14_cast_fp16 = linear(bias = var_573_to_fp16, weight = var_572_to_fp16, x = var_563_cast_fp16)[name = string("linear_14_cast_fp16")]; string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; tensor var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216893312)))]; tensor var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230000576)))]; tensor linear_15_cast_fp16 = linear(bias = var_579_to_fp16, weight = var_578_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 1280])]; tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_9_cast_fp16")]; tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 1280])]; tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_9_cast_fp16")]; tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 1280])]; tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 1280])]; tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; int32 var_602 = const()[name = string("op_602"), val = int32(-1)]; tensor var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor([-1])]; tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230003200)))]; tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230005824)))]; fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_620_cast_fp16 = layer_norm(axes = var_620_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_620_cast_fp16")]; tensor var_631_to_fp16 = const()[name = string("op_631_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230008448)))]; tensor var_632_to_fp16 = const()[name = string("op_632_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233285312)))]; tensor linear_16_cast_fp16 = linear(bias = var_632_to_fp16, weight = var_631_to_fp16, x = var_620_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_635_to_fp16 = const()[name = string("op_635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233287936)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_635_to_fp16, x = var_620_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236564800)))]; tensor var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239841664)))]; tensor linear_18_cast_fp16 = linear(bias = var_640_to_fp16, weight = var_639_to_fp16, x = var_620_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_642_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_642_shape_cast_fp16")]; int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; string var_642_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_642_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; tensor var_642_shape_cast_fp16_to_uint16 = cast(dtype = var_642_shape_cast_fp16_to_uint16_dtype_0, x = var_642_shape_cast_fp16)[name = string("cast_386")]; uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_642_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_385")]; int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")]; tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")]; int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1280)]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; tensor var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor([0, 0, 0])]; tensor var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor([true, false, true])]; tensor var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_54, end_mask = var_658_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_658_cast_fp16")]; tensor var_661_begin_0 = const()[name = string("op_661_begin_0"), val = tensor([0, 0, 0])]; tensor var_661_end_mask_0 = const()[name = string("op_661_end_mask_0"), val = tensor([true, false, true])]; tensor var_661_cast_fp16 = slice_by_index(begin = var_661_begin_0, end = concat_54, end_mask = var_661_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_661_cast_fp16")]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 20, 64])]; tensor var_671_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_671_cast_fp16")]; tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_19_cast_fp16 = mul(x = var_671_cast_fp16, y = const_168_to_fp16)[name = string("q_19_cast_fp16")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 20, 64])]; tensor var_678_cast_fp16 = reshape(shape = concat_57x, x = var_658_cast_fp16)[name = string("op_678_cast_fp16")]; tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_25_cast_fp16 = mul(x = var_678_cast_fp16, y = const_169_to_fp16)[name = string("k_25_cast_fp16")]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 20, 64])]; tensor var_685_cast_fp16 = reshape(shape = concat_58x, x = var_661_cast_fp16)[name = string("op_685_cast_fp16")]; tensor var_686 = const()[name = string("op_686"), val = tensor([0, 2, 1, 3])]; bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; tensor transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_266 = transpose(perm = transpose_266_perm_0, x = k_25_cast_fp16)[name = string("transpose_622")]; tensor transpose_265 = transpose(perm = transpose_265_perm_0, x = q_19_cast_fp16)[name = string("transpose_623")]; tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_13_cast_fp16")]; int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; tensor var_689_begin_0 = const()[name = string("op_689_begin_0"), val = tensor([0, 0])]; tensor var_689_end_mask_0 = const()[name = string("op_689_end_mask_0"), val = tensor([false, true])]; tensor var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = concat_59, end_mask = var_689_end_mask_0, x = mask_to_fp16)[name = string("op_689_cast_fp16")]; int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; tensor var_690_begin_0 = const()[name = string("op_690_begin_0"), val = tensor([0, 0])]; tensor var_690_end_mask_0 = const()[name = string("op_690_end_mask_0"), val = tensor([true, false])]; tensor var_690_cast_fp16 = slice_by_index(begin = var_690_begin_0, end = concat_60, end_mask = var_690_end_mask_0, x = var_689_cast_fp16)[name = string("op_690_cast_fp16")]; tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_690_cast_fp16)[name = string("qk_15_cast_fp16")]; tensor var_693_cast_fp16 = softmax(axis = var_602, x = qk_15_cast_fp16)[name = string("op_693_cast_fp16")]; bool var_695_transpose_x_0 = const()[name = string("op_695_transpose_x_0"), val = bool(false)]; bool var_695_transpose_y_0 = const()[name = string("op_695_transpose_y_0"), val = bool(false)]; tensor v_25_cast_fp16 = transpose(perm = var_686, x = var_685_cast_fp16)[name = string("transpose_624")]; tensor var_695_cast_fp16 = matmul(transpose_x = var_695_transpose_x_0, transpose_y = var_695_transpose_y_0, x = var_693_cast_fp16, y = v_25_cast_fp16)[name = string("op_695_cast_fp16")]; tensor var_696 = const()[name = string("op_696"), val = tensor([0, 2, 1, 3])]; tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 1280])]; tensor var_697_cast_fp16 = transpose(perm = var_696, x = var_695_cast_fp16)[name = string("transpose_621")]; tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_697_cast_fp16)[name = string("x_43_cast_fp16")]; tensor var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239844288)))]; tensor var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243121152)))]; tensor linear_19_cast_fp16 = linear(bias = var_702_to_fp16, weight = var_701_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_709_axes_0 = const()[name = string("op_709_axes_0"), val = tensor([-1])]; tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243123776)))]; tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243126400)))]; tensor var_709_cast_fp16 = layer_norm(axes = var_709_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_709_cast_fp16")]; tensor var_718_to_fp16 = const()[name = string("op_718_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243129024)))]; tensor var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246405888)))]; tensor linear_20_cast_fp16 = linear(bias = var_719_to_fp16, weight = var_718_to_fp16, x = var_709_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 20, 64])]; tensor var_739_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_739_cast_fp16")]; tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_23_cast_fp16 = mul(x = var_739_cast_fp16, y = const_170_to_fp16)[name = string("q_23_cast_fp16")]; tensor var_745 = const()[name = string("op_745"), val = tensor([1, 1500, 20, -1])]; tensor var_746_cast_fp16 = reshape(shape = var_745, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_746_cast_fp16")]; tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_29_cast_fp16 = mul(x = var_746_cast_fp16, y = const_171_to_fp16)[name = string("k_29_cast_fp16")]; tensor var_752 = const()[name = string("op_752"), val = tensor([1, 1500, 20, -1])]; tensor var_753_cast_fp16 = reshape(shape = var_752, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_753_cast_fp16")]; tensor var_754 = const()[name = string("op_754"), val = tensor([0, 2, 1, 3])]; bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; tensor transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_268 = transpose(perm = transpose_268_perm_0, x = k_29_cast_fp16)[name = string("transpose_618")]; tensor transpose_267 = transpose(perm = transpose_267_perm_0, x = q_23_cast_fp16)[name = string("transpose_619")]; tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_17_cast_fp16")]; tensor var_758_cast_fp16 = softmax(axis = var_602, x = qk_17_cast_fp16)[name = string("op_758_cast_fp16")]; bool var_760_transpose_x_0 = const()[name = string("op_760_transpose_x_0"), val = bool(false)]; bool var_760_transpose_y_0 = const()[name = string("op_760_transpose_y_0"), val = bool(false)]; tensor v_29_cast_fp16 = transpose(perm = var_754, x = var_753_cast_fp16)[name = string("transpose_620")]; tensor var_760_cast_fp16 = matmul(transpose_x = var_760_transpose_x_0, transpose_y = var_760_transpose_y_0, x = var_758_cast_fp16, y = v_29_cast_fp16)[name = string("op_760_cast_fp16")]; tensor var_761 = const()[name = string("op_761"), val = tensor([0, 2, 1, 3])]; tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 1280])]; tensor var_762_cast_fp16 = transpose(perm = var_761, x = var_760_cast_fp16)[name = string("transpose_617")]; tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_762_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246408512)))]; tensor var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249685376)))]; tensor linear_21_cast_fp16 = linear(bias = var_767_to_fp16, weight = var_766_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; tensor var_774_axes_0 = const()[name = string("op_774_axes_0"), val = tensor([-1])]; tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249688000)))]; tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249690624)))]; tensor var_774_cast_fp16 = layer_norm(axes = var_774_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_774_cast_fp16")]; tensor var_783_to_fp16 = const()[name = string("op_783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249693248)))]; tensor var_784_to_fp16 = const()[name = string("op_784_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262800512)))]; tensor linear_22_cast_fp16 = linear(bias = var_784_to_fp16, weight = var_783_to_fp16, x = var_774_cast_fp16)[name = string("linear_22_cast_fp16")]; string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_789_to_fp16 = const()[name = string("op_789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262810816)))]; tensor var_790_to_fp16 = const()[name = string("op_790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275918080)))]; tensor linear_23_cast_fp16 = linear(bias = var_790_to_fp16, weight = var_789_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 1280])]; tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_13_cast_fp16")]; tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 1280])]; tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_13_cast_fp16")]; tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 1280])]; tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 1280])]; tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; int32 var_813 = const()[name = string("op_813"), val = int32(-1)]; tensor var_831_axes_0 = const()[name = string("op_831_axes_0"), val = tensor([-1])]; tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275920704)))]; tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275923328)))]; fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_831_cast_fp16 = layer_norm(axes = var_831_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_831_cast_fp16")]; tensor var_842_to_fp16 = const()[name = string("op_842_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275925952)))]; tensor var_843_to_fp16 = const()[name = string("op_843_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279202816)))]; tensor linear_24_cast_fp16 = linear(bias = var_843_to_fp16, weight = var_842_to_fp16, x = var_831_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_846_to_fp16 = const()[name = string("op_846_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279205440)))]; tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_846_to_fp16, x = var_831_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282482304)))]; tensor var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285759168)))]; tensor linear_26_cast_fp16 = linear(bias = var_851_to_fp16, weight = var_850_to_fp16, x = var_831_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor var_853_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_853_shape_cast_fp16")]; int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; tensor var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_384")]; uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_383")]; int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")]; tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")]; int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1280)]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; tensor var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor([0, 0, 0])]; tensor var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor([true, false, true])]; tensor var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_76, end_mask = var_869_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_869_cast_fp16")]; tensor var_872_begin_0 = const()[name = string("op_872_begin_0"), val = tensor([0, 0, 0])]; tensor var_872_end_mask_0 = const()[name = string("op_872_end_mask_0"), val = tensor([true, false, true])]; tensor var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = concat_76, end_mask = var_872_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_872_cast_fp16")]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 20, 64])]; tensor var_882_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_882_cast_fp16")]; tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_27_cast_fp16 = mul(x = var_882_cast_fp16, y = const_172_to_fp16)[name = string("q_27_cast_fp16")]; tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 20, 64])]; tensor var_889_cast_fp16 = reshape(shape = concat_79x, x = var_869_cast_fp16)[name = string("op_889_cast_fp16")]; tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_35_cast_fp16 = mul(x = var_889_cast_fp16, y = const_173_to_fp16)[name = string("k_35_cast_fp16")]; tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 20, 64])]; tensor var_896_cast_fp16 = reshape(shape = concat_80x, x = var_872_cast_fp16)[name = string("op_896_cast_fp16")]; tensor var_897 = const()[name = string("op_897"), val = tensor([0, 2, 1, 3])]; bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; tensor transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_270 = transpose(perm = transpose_270_perm_0, x = k_35_cast_fp16)[name = string("transpose_614")]; tensor transpose_269 = transpose(perm = transpose_269_perm_0, x = q_27_cast_fp16)[name = string("transpose_615")]; tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_19_cast_fp16")]; int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; tensor var_900_begin_0 = const()[name = string("op_900_begin_0"), val = tensor([0, 0])]; tensor var_900_end_mask_0 = const()[name = string("op_900_end_mask_0"), val = tensor([false, true])]; tensor var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = concat_81, end_mask = var_900_end_mask_0, x = mask_to_fp16)[name = string("op_900_cast_fp16")]; int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; tensor var_901_begin_0 = const()[name = string("op_901_begin_0"), val = tensor([0, 0])]; tensor var_901_end_mask_0 = const()[name = string("op_901_end_mask_0"), val = tensor([true, false])]; tensor var_901_cast_fp16 = slice_by_index(begin = var_901_begin_0, end = concat_82, end_mask = var_901_end_mask_0, x = var_900_cast_fp16)[name = string("op_901_cast_fp16")]; tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_901_cast_fp16)[name = string("qk_21_cast_fp16")]; tensor var_904_cast_fp16 = softmax(axis = var_813, x = qk_21_cast_fp16)[name = string("op_904_cast_fp16")]; bool var_906_transpose_x_0 = const()[name = string("op_906_transpose_x_0"), val = bool(false)]; bool var_906_transpose_y_0 = const()[name = string("op_906_transpose_y_0"), val = bool(false)]; tensor v_35_cast_fp16 = transpose(perm = var_897, x = var_896_cast_fp16)[name = string("transpose_616")]; tensor var_906_cast_fp16 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_904_cast_fp16, y = v_35_cast_fp16)[name = string("op_906_cast_fp16")]; tensor var_907 = const()[name = string("op_907"), val = tensor([0, 2, 1, 3])]; tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 1280])]; tensor var_908_cast_fp16 = transpose(perm = var_907, x = var_906_cast_fp16)[name = string("transpose_613")]; tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_908_cast_fp16)[name = string("x_61_cast_fp16")]; tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285761792)))]; tensor var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289038656)))]; tensor linear_27_cast_fp16 = linear(bias = var_913_to_fp16, weight = var_912_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; tensor var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor([-1])]; tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289041280)))]; tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289043904)))]; tensor var_920_cast_fp16 = layer_norm(axes = var_920_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_920_cast_fp16")]; tensor var_929_to_fp16 = const()[name = string("op_929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289046528)))]; tensor var_930_to_fp16 = const()[name = string("op_930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292323392)))]; tensor linear_28_cast_fp16 = linear(bias = var_930_to_fp16, weight = var_929_to_fp16, x = var_920_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 20, 64])]; tensor var_950_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_950_cast_fp16")]; tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_31_cast_fp16 = mul(x = var_950_cast_fp16, y = const_174_to_fp16)[name = string("q_31_cast_fp16")]; tensor var_956 = const()[name = string("op_956"), val = tensor([1, 1500, 20, -1])]; tensor var_957_cast_fp16 = reshape(shape = var_956, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_957_cast_fp16")]; tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_39_cast_fp16 = mul(x = var_957_cast_fp16, y = const_175_to_fp16)[name = string("k_39_cast_fp16")]; tensor var_963 = const()[name = string("op_963"), val = tensor([1, 1500, 20, -1])]; tensor var_964_cast_fp16 = reshape(shape = var_963, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_964_cast_fp16")]; tensor var_965 = const()[name = string("op_965"), val = tensor([0, 2, 1, 3])]; bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; tensor transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_272 = transpose(perm = transpose_272_perm_0, x = k_39_cast_fp16)[name = string("transpose_610")]; tensor transpose_271 = transpose(perm = transpose_271_perm_0, x = q_31_cast_fp16)[name = string("transpose_611")]; tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_23_cast_fp16")]; tensor var_969_cast_fp16 = softmax(axis = var_813, x = qk_23_cast_fp16)[name = string("op_969_cast_fp16")]; bool var_971_transpose_x_0 = const()[name = string("op_971_transpose_x_0"), val = bool(false)]; bool var_971_transpose_y_0 = const()[name = string("op_971_transpose_y_0"), val = bool(false)]; tensor v_39_cast_fp16 = transpose(perm = var_965, x = var_964_cast_fp16)[name = string("transpose_612")]; tensor var_971_cast_fp16 = matmul(transpose_x = var_971_transpose_x_0, transpose_y = var_971_transpose_y_0, x = var_969_cast_fp16, y = v_39_cast_fp16)[name = string("op_971_cast_fp16")]; tensor var_972 = const()[name = string("op_972"), val = tensor([0, 2, 1, 3])]; tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 1280])]; tensor var_973_cast_fp16 = transpose(perm = var_972, x = var_971_cast_fp16)[name = string("transpose_609")]; tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_973_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292326016)))]; tensor var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295602880)))]; tensor linear_29_cast_fp16 = linear(bias = var_978_to_fp16, weight = var_977_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor([-1])]; tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295605504)))]; tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295608128)))]; tensor var_985_cast_fp16 = layer_norm(axes = var_985_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_985_cast_fp16")]; tensor var_994_to_fp16 = const()[name = string("op_994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295610752)))]; tensor var_995_to_fp16 = const()[name = string("op_995_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308718016)))]; tensor linear_30_cast_fp16 = linear(bias = var_995_to_fp16, weight = var_994_to_fp16, x = var_985_cast_fp16)[name = string("linear_30_cast_fp16")]; string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308728320)))]; tensor var_1001_to_fp16 = const()[name = string("op_1001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321835584)))]; tensor linear_31_cast_fp16 = linear(bias = var_1001_to_fp16, weight = var_1000_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 1280])]; tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_17_cast_fp16")]; tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 1280])]; tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_17_cast_fp16")]; tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 1280])]; tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 1280])]; tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; int32 var_1024 = const()[name = string("op_1024"), val = int32(-1)]; tensor var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor([-1])]; tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321838208)))]; tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321840832)))]; fp16 var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1042_cast_fp16")]; tensor var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321843456)))]; tensor var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325120320)))]; tensor linear_32_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = var_1042_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1057_to_fp16 = const()[name = string("op_1057_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325122944)))]; tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1057_to_fp16, x = var_1042_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328399808)))]; tensor var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331676672)))]; tensor linear_34_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1042_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_1064_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1064_shape_cast_fp16")]; int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; string var_1064_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1064_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; tensor var_1064_shape_cast_fp16_to_uint16 = cast(dtype = var_1064_shape_cast_fp16_to_uint16_dtype_0, x = var_1064_shape_cast_fp16)[name = string("cast_382")]; uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1064_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_381")]; int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")]; tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")]; int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1280)]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; tensor var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor([0, 0, 0])]; tensor var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor([true, false, true])]; tensor var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_98, end_mask = var_1080_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1080_cast_fp16")]; tensor var_1083_begin_0 = const()[name = string("op_1083_begin_0"), val = tensor([0, 0, 0])]; tensor var_1083_end_mask_0 = const()[name = string("op_1083_end_mask_0"), val = tensor([true, false, true])]; tensor var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = concat_98, end_mask = var_1083_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1083_cast_fp16")]; tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 20, 64])]; tensor var_1093_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1093_cast_fp16")]; tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_35_cast_fp16 = mul(x = var_1093_cast_fp16, y = const_176_to_fp16)[name = string("q_35_cast_fp16")]; tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 20, 64])]; tensor var_1100_cast_fp16 = reshape(shape = concat_101x, x = var_1080_cast_fp16)[name = string("op_1100_cast_fp16")]; tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_45_cast_fp16 = mul(x = var_1100_cast_fp16, y = const_177_to_fp16)[name = string("k_45_cast_fp16")]; tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 20, 64])]; tensor var_1107_cast_fp16 = reshape(shape = concat_102x, x = var_1083_cast_fp16)[name = string("op_1107_cast_fp16")]; tensor var_1108 = const()[name = string("op_1108"), val = tensor([0, 2, 1, 3])]; bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; tensor transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_274 = transpose(perm = transpose_274_perm_0, x = k_45_cast_fp16)[name = string("transpose_606")]; tensor transpose_273 = transpose(perm = transpose_273_perm_0, x = q_35_cast_fp16)[name = string("transpose_607")]; tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_25_cast_fp16")]; int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; tensor var_1111_begin_0 = const()[name = string("op_1111_begin_0"), val = tensor([0, 0])]; tensor var_1111_end_mask_0 = const()[name = string("op_1111_end_mask_0"), val = tensor([false, true])]; tensor var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = concat_103, end_mask = var_1111_end_mask_0, x = mask_to_fp16)[name = string("op_1111_cast_fp16")]; int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; tensor var_1112_begin_0 = const()[name = string("op_1112_begin_0"), val = tensor([0, 0])]; tensor var_1112_end_mask_0 = const()[name = string("op_1112_end_mask_0"), val = tensor([true, false])]; tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = concat_104, end_mask = var_1112_end_mask_0, x = var_1111_cast_fp16)[name = string("op_1112_cast_fp16")]; tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1112_cast_fp16)[name = string("qk_27_cast_fp16")]; tensor var_1115_cast_fp16 = softmax(axis = var_1024, x = qk_27_cast_fp16)[name = string("op_1115_cast_fp16")]; bool var_1117_transpose_x_0 = const()[name = string("op_1117_transpose_x_0"), val = bool(false)]; bool var_1117_transpose_y_0 = const()[name = string("op_1117_transpose_y_0"), val = bool(false)]; tensor v_45_cast_fp16 = transpose(perm = var_1108, x = var_1107_cast_fp16)[name = string("transpose_608")]; tensor var_1117_cast_fp16 = matmul(transpose_x = var_1117_transpose_x_0, transpose_y = var_1117_transpose_y_0, x = var_1115_cast_fp16, y = v_45_cast_fp16)[name = string("op_1117_cast_fp16")]; tensor var_1118 = const()[name = string("op_1118"), val = tensor([0, 2, 1, 3])]; tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 1280])]; tensor var_1119_cast_fp16 = transpose(perm = var_1118, x = var_1117_cast_fp16)[name = string("transpose_605")]; tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1119_cast_fp16)[name = string("x_79_cast_fp16")]; tensor var_1123_to_fp16 = const()[name = string("op_1123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331679296)))]; tensor var_1124_to_fp16 = const()[name = string("op_1124_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334956160)))]; tensor linear_35_cast_fp16 = linear(bias = var_1124_to_fp16, weight = var_1123_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; tensor var_1131_axes_0 = const()[name = string("op_1131_axes_0"), val = tensor([-1])]; tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334958784)))]; tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334961408)))]; tensor var_1131_cast_fp16 = layer_norm(axes = var_1131_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1131_cast_fp16")]; tensor var_1140_to_fp16 = const()[name = string("op_1140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334964032)))]; tensor var_1141_to_fp16 = const()[name = string("op_1141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338240896)))]; tensor linear_36_cast_fp16 = linear(bias = var_1141_to_fp16, weight = var_1140_to_fp16, x = var_1131_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 20, 64])]; tensor var_1161_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1161_cast_fp16")]; tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_39_cast_fp16 = mul(x = var_1161_cast_fp16, y = const_178_to_fp16)[name = string("q_39_cast_fp16")]; tensor var_1167 = const()[name = string("op_1167"), val = tensor([1, 1500, 20, -1])]; tensor var_1168_cast_fp16 = reshape(shape = var_1167, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1168_cast_fp16")]; tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_49_cast_fp16 = mul(x = var_1168_cast_fp16, y = const_179_to_fp16)[name = string("k_49_cast_fp16")]; tensor var_1174 = const()[name = string("op_1174"), val = tensor([1, 1500, 20, -1])]; tensor var_1175_cast_fp16 = reshape(shape = var_1174, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1175_cast_fp16")]; tensor var_1176 = const()[name = string("op_1176"), val = tensor([0, 2, 1, 3])]; bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; tensor transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_276 = transpose(perm = transpose_276_perm_0, x = k_49_cast_fp16)[name = string("transpose_602")]; tensor transpose_275 = transpose(perm = transpose_275_perm_0, x = q_39_cast_fp16)[name = string("transpose_603")]; tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_29_cast_fp16")]; tensor var_1180_cast_fp16 = softmax(axis = var_1024, x = qk_29_cast_fp16)[name = string("op_1180_cast_fp16")]; bool var_1182_transpose_x_0 = const()[name = string("op_1182_transpose_x_0"), val = bool(false)]; bool var_1182_transpose_y_0 = const()[name = string("op_1182_transpose_y_0"), val = bool(false)]; tensor v_49_cast_fp16 = transpose(perm = var_1176, x = var_1175_cast_fp16)[name = string("transpose_604")]; tensor var_1182_cast_fp16 = matmul(transpose_x = var_1182_transpose_x_0, transpose_y = var_1182_transpose_y_0, x = var_1180_cast_fp16, y = v_49_cast_fp16)[name = string("op_1182_cast_fp16")]; tensor var_1183 = const()[name = string("op_1183"), val = tensor([0, 2, 1, 3])]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 1280])]; tensor var_1184_cast_fp16 = transpose(perm = var_1183, x = var_1182_cast_fp16)[name = string("transpose_601")]; tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1184_cast_fp16)[name = string("x_85_cast_fp16")]; tensor var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338243520)))]; tensor var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341520384)))]; tensor linear_37_cast_fp16 = linear(bias = var_1189_to_fp16, weight = var_1188_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_1196_axes_0 = const()[name = string("op_1196_axes_0"), val = tensor([-1])]; tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341523008)))]; tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341525632)))]; tensor var_1196_cast_fp16 = layer_norm(axes = var_1196_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1196_cast_fp16")]; tensor var_1205_to_fp16 = const()[name = string("op_1205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341528256)))]; tensor var_1206_to_fp16 = const()[name = string("op_1206_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354635520)))]; tensor linear_38_cast_fp16 = linear(bias = var_1206_to_fp16, weight = var_1205_to_fp16, x = var_1196_cast_fp16)[name = string("linear_38_cast_fp16")]; string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; tensor var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354645824)))]; tensor var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367753088)))]; tensor linear_39_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 1280])]; tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_21_cast_fp16")]; tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 1280])]; tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_21_cast_fp16")]; tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 1280])]; tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 1280])]; tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; int32 var_1235 = const()[name = string("op_1235"), val = int32(-1)]; tensor var_1253_axes_0 = const()[name = string("op_1253_axes_0"), val = tensor([-1])]; tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367755712)))]; tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367758336)))]; fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1253_cast_fp16 = layer_norm(axes = var_1253_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1253_cast_fp16")]; tensor var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367760960)))]; tensor var_1265_to_fp16 = const()[name = string("op_1265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371037824)))]; tensor linear_40_cast_fp16 = linear(bias = var_1265_to_fp16, weight = var_1264_to_fp16, x = var_1253_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor var_1268_to_fp16 = const()[name = string("op_1268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371040448)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1268_to_fp16, x = var_1253_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374317312)))]; tensor var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377594176)))]; tensor linear_42_cast_fp16 = linear(bias = var_1273_to_fp16, weight = var_1272_to_fp16, x = var_1253_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_1275_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1275_shape_cast_fp16")]; int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; string var_1275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; tensor var_1275_shape_cast_fp16_to_uint16 = cast(dtype = var_1275_shape_cast_fp16_to_uint16_dtype_0, x = var_1275_shape_cast_fp16)[name = string("cast_380")]; uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1275_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_379")]; int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")]; tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")]; int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1280)]; int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([0, 0, 0])]; tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([true, false, true])]; tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_120, end_mask = var_1291_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor var_1294_begin_0 = const()[name = string("op_1294_begin_0"), val = tensor([0, 0, 0])]; tensor var_1294_end_mask_0 = const()[name = string("op_1294_end_mask_0"), val = tensor([true, false, true])]; tensor var_1294_cast_fp16 = slice_by_index(begin = var_1294_begin_0, end = concat_120, end_mask = var_1294_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1294_cast_fp16")]; tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 20, 64])]; tensor var_1304_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1304_cast_fp16")]; tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_43_cast_fp16 = mul(x = var_1304_cast_fp16, y = const_180_to_fp16)[name = string("q_43_cast_fp16")]; tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 20, 64])]; tensor var_1311_cast_fp16 = reshape(shape = concat_123x, x = var_1291_cast_fp16)[name = string("op_1311_cast_fp16")]; tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_55_cast_fp16 = mul(x = var_1311_cast_fp16, y = const_181_to_fp16)[name = string("k_55_cast_fp16")]; tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 20, 64])]; tensor var_1318_cast_fp16 = reshape(shape = concat_124x, x = var_1294_cast_fp16)[name = string("op_1318_cast_fp16")]; tensor var_1319 = const()[name = string("op_1319"), val = tensor([0, 2, 1, 3])]; bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; tensor transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_278 = transpose(perm = transpose_278_perm_0, x = k_55_cast_fp16)[name = string("transpose_598")]; tensor transpose_277 = transpose(perm = transpose_277_perm_0, x = q_43_cast_fp16)[name = string("transpose_599")]; tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_31_cast_fp16")]; int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; tensor var_1322_begin_0 = const()[name = string("op_1322_begin_0"), val = tensor([0, 0])]; tensor var_1322_end_mask_0 = const()[name = string("op_1322_end_mask_0"), val = tensor([false, true])]; tensor var_1322_cast_fp16 = slice_by_index(begin = var_1322_begin_0, end = concat_125, end_mask = var_1322_end_mask_0, x = mask_to_fp16)[name = string("op_1322_cast_fp16")]; int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; tensor var_1323_begin_0 = const()[name = string("op_1323_begin_0"), val = tensor([0, 0])]; tensor var_1323_end_mask_0 = const()[name = string("op_1323_end_mask_0"), val = tensor([true, false])]; tensor var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = concat_126, end_mask = var_1323_end_mask_0, x = var_1322_cast_fp16)[name = string("op_1323_cast_fp16")]; tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1323_cast_fp16)[name = string("qk_33_cast_fp16")]; tensor var_1326_cast_fp16 = softmax(axis = var_1235, x = qk_33_cast_fp16)[name = string("op_1326_cast_fp16")]; bool var_1328_transpose_x_0 = const()[name = string("op_1328_transpose_x_0"), val = bool(false)]; bool var_1328_transpose_y_0 = const()[name = string("op_1328_transpose_y_0"), val = bool(false)]; tensor v_55_cast_fp16 = transpose(perm = var_1319, x = var_1318_cast_fp16)[name = string("transpose_600")]; tensor var_1328_cast_fp16 = matmul(transpose_x = var_1328_transpose_x_0, transpose_y = var_1328_transpose_y_0, x = var_1326_cast_fp16, y = v_55_cast_fp16)[name = string("op_1328_cast_fp16")]; tensor var_1329 = const()[name = string("op_1329"), val = tensor([0, 2, 1, 3])]; tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 1280])]; tensor var_1330_cast_fp16 = transpose(perm = var_1329, x = var_1328_cast_fp16)[name = string("transpose_597")]; tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1330_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_1334_to_fp16 = const()[name = string("op_1334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377596800)))]; tensor var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873664)))]; tensor linear_43_cast_fp16 = linear(bias = var_1335_to_fp16, weight = var_1334_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; tensor var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor([-1])]; tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380876288)))]; tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380878912)))]; tensor var_1342_cast_fp16 = layer_norm(axes = var_1342_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1342_cast_fp16")]; tensor var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380881536)))]; tensor var_1352_to_fp16 = const()[name = string("op_1352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384158400)))]; tensor linear_44_cast_fp16 = linear(bias = var_1352_to_fp16, weight = var_1351_to_fp16, x = var_1342_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 20, 64])]; tensor var_1372_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1372_cast_fp16")]; tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_47_cast_fp16 = mul(x = var_1372_cast_fp16, y = const_182_to_fp16)[name = string("q_47_cast_fp16")]; tensor var_1378 = const()[name = string("op_1378"), val = tensor([1, 1500, 20, -1])]; tensor var_1379_cast_fp16 = reshape(shape = var_1378, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1379_cast_fp16")]; tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_59_cast_fp16 = mul(x = var_1379_cast_fp16, y = const_183_to_fp16)[name = string("k_59_cast_fp16")]; tensor var_1385 = const()[name = string("op_1385"), val = tensor([1, 1500, 20, -1])]; tensor var_1386_cast_fp16 = reshape(shape = var_1385, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1386_cast_fp16")]; tensor var_1387 = const()[name = string("op_1387"), val = tensor([0, 2, 1, 3])]; bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; tensor transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_280 = transpose(perm = transpose_280_perm_0, x = k_59_cast_fp16)[name = string("transpose_594")]; tensor transpose_279 = transpose(perm = transpose_279_perm_0, x = q_47_cast_fp16)[name = string("transpose_595")]; tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_35_cast_fp16")]; tensor var_1391_cast_fp16 = softmax(axis = var_1235, x = qk_35_cast_fp16)[name = string("op_1391_cast_fp16")]; bool var_1393_transpose_x_0 = const()[name = string("op_1393_transpose_x_0"), val = bool(false)]; bool var_1393_transpose_y_0 = const()[name = string("op_1393_transpose_y_0"), val = bool(false)]; tensor v_59_cast_fp16 = transpose(perm = var_1387, x = var_1386_cast_fp16)[name = string("transpose_596")]; tensor var_1393_cast_fp16 = matmul(transpose_x = var_1393_transpose_x_0, transpose_y = var_1393_transpose_y_0, x = var_1391_cast_fp16, y = v_59_cast_fp16)[name = string("op_1393_cast_fp16")]; tensor var_1394 = const()[name = string("op_1394"), val = tensor([0, 2, 1, 3])]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 1280])]; tensor var_1395_cast_fp16 = transpose(perm = var_1394, x = var_1393_cast_fp16)[name = string("transpose_593")]; tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1395_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384161024)))]; tensor var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387437888)))]; tensor linear_45_cast_fp16 = linear(bias = var_1400_to_fp16, weight = var_1399_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; tensor var_1407_axes_0 = const()[name = string("op_1407_axes_0"), val = tensor([-1])]; tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387440512)))]; tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387443136)))]; tensor var_1407_cast_fp16 = layer_norm(axes = var_1407_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1407_cast_fp16")]; tensor var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387445760)))]; tensor var_1417_to_fp16 = const()[name = string("op_1417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400553024)))]; tensor linear_46_cast_fp16 = linear(bias = var_1417_to_fp16, weight = var_1416_to_fp16, x = var_1407_cast_fp16)[name = string("linear_46_cast_fp16")]; string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400563328)))]; tensor var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413670592)))]; tensor linear_47_cast_fp16 = linear(bias = var_1423_to_fp16, weight = var_1422_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 1280])]; tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_25_cast_fp16")]; tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 1280])]; tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_25_cast_fp16")]; tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 1280])]; tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 1280])]; tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; int32 var_1446 = const()[name = string("op_1446"), val = int32(-1)]; tensor var_1464_axes_0 = const()[name = string("op_1464_axes_0"), val = tensor([-1])]; tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413673216)))]; tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413675840)))]; fp16 var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1464_cast_fp16 = layer_norm(axes = var_1464_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1464_cast_fp16")]; tensor var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413678464)))]; tensor var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416955328)))]; tensor linear_48_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = var_1464_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor var_1479_to_fp16 = const()[name = string("op_1479_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416957952)))]; tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1479_to_fp16, x = var_1464_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420234816)))]; tensor var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423511680)))]; tensor linear_50_cast_fp16 = linear(bias = var_1484_to_fp16, weight = var_1483_to_fp16, x = var_1464_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor var_1486_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1486_shape_cast_fp16")]; int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; string var_1486_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1486_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; tensor var_1486_shape_cast_fp16_to_uint16 = cast(dtype = var_1486_shape_cast_fp16_to_uint16_dtype_0, x = var_1486_shape_cast_fp16)[name = string("cast_378")]; uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1486_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_377")]; int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")]; tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")]; int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1280)]; int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; tensor var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor([0, 0, 0])]; tensor var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor([true, false, true])]; tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_142, end_mask = var_1502_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1502_cast_fp16")]; tensor var_1505_begin_0 = const()[name = string("op_1505_begin_0"), val = tensor([0, 0, 0])]; tensor var_1505_end_mask_0 = const()[name = string("op_1505_end_mask_0"), val = tensor([true, false, true])]; tensor var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = concat_142, end_mask = var_1505_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1505_cast_fp16")]; tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 20, 64])]; tensor var_1515_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1515_cast_fp16")]; tensor const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_51_cast_fp16 = mul(x = var_1515_cast_fp16, y = const_184_to_fp16)[name = string("q_51_cast_fp16")]; tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 20, 64])]; tensor var_1522_cast_fp16 = reshape(shape = concat_145x, x = var_1502_cast_fp16)[name = string("op_1522_cast_fp16")]; tensor const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_65_cast_fp16 = mul(x = var_1522_cast_fp16, y = const_185_to_fp16)[name = string("k_65_cast_fp16")]; tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 20, 64])]; tensor var_1529_cast_fp16 = reshape(shape = concat_146x, x = var_1505_cast_fp16)[name = string("op_1529_cast_fp16")]; tensor var_1530 = const()[name = string("op_1530"), val = tensor([0, 2, 1, 3])]; bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; tensor transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_282 = transpose(perm = transpose_282_perm_0, x = k_65_cast_fp16)[name = string("transpose_590")]; tensor transpose_281 = transpose(perm = transpose_281_perm_0, x = q_51_cast_fp16)[name = string("transpose_591")]; tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_37_cast_fp16")]; int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; tensor var_1533_begin_0 = const()[name = string("op_1533_begin_0"), val = tensor([0, 0])]; tensor var_1533_end_mask_0 = const()[name = string("op_1533_end_mask_0"), val = tensor([false, true])]; tensor var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = concat_147, end_mask = var_1533_end_mask_0, x = mask_to_fp16)[name = string("op_1533_cast_fp16")]; int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; tensor var_1534_begin_0 = const()[name = string("op_1534_begin_0"), val = tensor([0, 0])]; tensor var_1534_end_mask_0 = const()[name = string("op_1534_end_mask_0"), val = tensor([true, false])]; tensor var_1534_cast_fp16 = slice_by_index(begin = var_1534_begin_0, end = concat_148, end_mask = var_1534_end_mask_0, x = var_1533_cast_fp16)[name = string("op_1534_cast_fp16")]; tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1534_cast_fp16)[name = string("qk_39_cast_fp16")]; tensor var_1537_cast_fp16 = softmax(axis = var_1446, x = qk_39_cast_fp16)[name = string("op_1537_cast_fp16")]; bool var_1539_transpose_x_0 = const()[name = string("op_1539_transpose_x_0"), val = bool(false)]; bool var_1539_transpose_y_0 = const()[name = string("op_1539_transpose_y_0"), val = bool(false)]; tensor v_65_cast_fp16 = transpose(perm = var_1530, x = var_1529_cast_fp16)[name = string("transpose_592")]; tensor var_1539_cast_fp16 = matmul(transpose_x = var_1539_transpose_x_0, transpose_y = var_1539_transpose_y_0, x = var_1537_cast_fp16, y = v_65_cast_fp16)[name = string("op_1539_cast_fp16")]; tensor var_1540 = const()[name = string("op_1540"), val = tensor([0, 2, 1, 3])]; tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 1280])]; tensor var_1541_cast_fp16 = transpose(perm = var_1540, x = var_1539_cast_fp16)[name = string("transpose_589")]; tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1541_cast_fp16)[name = string("x_115_cast_fp16")]; tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423514304)))]; tensor var_1546_to_fp16 = const()[name = string("op_1546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426791168)))]; tensor linear_51_cast_fp16 = linear(bias = var_1546_to_fp16, weight = var_1545_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; tensor var_1553_axes_0 = const()[name = string("op_1553_axes_0"), val = tensor([-1])]; tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426793792)))]; tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426796416)))]; tensor var_1553_cast_fp16 = layer_norm(axes = var_1553_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1553_cast_fp16")]; tensor var_1562_to_fp16 = const()[name = string("op_1562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426799040)))]; tensor var_1563_to_fp16 = const()[name = string("op_1563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430075904)))]; tensor linear_52_cast_fp16 = linear(bias = var_1563_to_fp16, weight = var_1562_to_fp16, x = var_1553_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 20, 64])]; tensor var_1583_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1583_cast_fp16")]; tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_55_cast_fp16 = mul(x = var_1583_cast_fp16, y = const_186_to_fp16)[name = string("q_55_cast_fp16")]; tensor var_1589 = const()[name = string("op_1589"), val = tensor([1, 1500, 20, -1])]; tensor var_1590_cast_fp16 = reshape(shape = var_1589, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1590_cast_fp16")]; tensor const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_69_cast_fp16 = mul(x = var_1590_cast_fp16, y = const_187_to_fp16)[name = string("k_69_cast_fp16")]; tensor var_1596 = const()[name = string("op_1596"), val = tensor([1, 1500, 20, -1])]; tensor var_1597_cast_fp16 = reshape(shape = var_1596, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1597_cast_fp16")]; tensor var_1598 = const()[name = string("op_1598"), val = tensor([0, 2, 1, 3])]; bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; tensor transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_284 = transpose(perm = transpose_284_perm_0, x = k_69_cast_fp16)[name = string("transpose_586")]; tensor transpose_283 = transpose(perm = transpose_283_perm_0, x = q_55_cast_fp16)[name = string("transpose_587")]; tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_41_cast_fp16")]; tensor var_1602_cast_fp16 = softmax(axis = var_1446, x = qk_41_cast_fp16)[name = string("op_1602_cast_fp16")]; bool var_1604_transpose_x_0 = const()[name = string("op_1604_transpose_x_0"), val = bool(false)]; bool var_1604_transpose_y_0 = const()[name = string("op_1604_transpose_y_0"), val = bool(false)]; tensor v_69_cast_fp16 = transpose(perm = var_1598, x = var_1597_cast_fp16)[name = string("transpose_588")]; tensor var_1604_cast_fp16 = matmul(transpose_x = var_1604_transpose_x_0, transpose_y = var_1604_transpose_y_0, x = var_1602_cast_fp16, y = v_69_cast_fp16)[name = string("op_1604_cast_fp16")]; tensor var_1605 = const()[name = string("op_1605"), val = tensor([0, 2, 1, 3])]; tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 1280])]; tensor var_1606_cast_fp16 = transpose(perm = var_1605, x = var_1604_cast_fp16)[name = string("transpose_585")]; tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1606_cast_fp16)[name = string("x_121_cast_fp16")]; tensor var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430078528)))]; tensor var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433355392)))]; tensor linear_53_cast_fp16 = linear(bias = var_1611_to_fp16, weight = var_1610_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1618_axes_0 = const()[name = string("op_1618_axes_0"), val = tensor([-1])]; tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433358016)))]; tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433360640)))]; tensor var_1618_cast_fp16 = layer_norm(axes = var_1618_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1618_cast_fp16")]; tensor var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433363264)))]; tensor var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446470528)))]; tensor linear_54_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1618_cast_fp16)[name = string("linear_54_cast_fp16")]; string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446480832)))]; tensor var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459588096)))]; tensor linear_55_cast_fp16 = linear(bias = var_1634_to_fp16, weight = var_1633_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 1280])]; tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_29_cast_fp16")]; tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 1280])]; tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_29_cast_fp16")]; tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 1280])]; tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 1280])]; tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)]; tensor var_1675_axes_0 = const()[name = string("op_1675_axes_0"), val = tensor([-1])]; tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459590720)))]; tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459593344)))]; fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1675_cast_fp16 = layer_norm(axes = var_1675_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1675_cast_fp16")]; tensor var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459595968)))]; tensor var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462872832)))]; tensor linear_56_cast_fp16 = linear(bias = var_1687_to_fp16, weight = var_1686_to_fp16, x = var_1675_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor var_1690_to_fp16 = const()[name = string("op_1690_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462875456)))]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1690_to_fp16, x = var_1675_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466152320)))]; tensor var_1695_to_fp16 = const()[name = string("op_1695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469429184)))]; tensor linear_58_cast_fp16 = linear(bias = var_1695_to_fp16, weight = var_1694_to_fp16, x = var_1675_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor var_1697_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1697_shape_cast_fp16")]; int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; string var_1697_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1697_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; tensor var_1697_shape_cast_fp16_to_uint16 = cast(dtype = var_1697_shape_cast_fp16_to_uint16_dtype_0, x = var_1697_shape_cast_fp16)[name = string("cast_376")]; uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1697_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_375")]; int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")]; tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")]; int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1280)]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([0, 0, 0])]; tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([true, false, true])]; tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_164, end_mask = var_1713_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1713_cast_fp16")]; tensor var_1716_begin_0 = const()[name = string("op_1716_begin_0"), val = tensor([0, 0, 0])]; tensor var_1716_end_mask_0 = const()[name = string("op_1716_end_mask_0"), val = tensor([true, false, true])]; tensor var_1716_cast_fp16 = slice_by_index(begin = var_1716_begin_0, end = concat_164, end_mask = var_1716_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1716_cast_fp16")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 20, 64])]; tensor var_1726_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1726_cast_fp16")]; tensor const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_59_cast_fp16 = mul(x = var_1726_cast_fp16, y = const_188_to_fp16)[name = string("q_59_cast_fp16")]; tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 20, 64])]; tensor var_1733_cast_fp16 = reshape(shape = concat_167x, x = var_1713_cast_fp16)[name = string("op_1733_cast_fp16")]; tensor const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_75_cast_fp16 = mul(x = var_1733_cast_fp16, y = const_189_to_fp16)[name = string("k_75_cast_fp16")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 20, 64])]; tensor var_1740_cast_fp16 = reshape(shape = concat_168x, x = var_1716_cast_fp16)[name = string("op_1740_cast_fp16")]; tensor var_1741 = const()[name = string("op_1741"), val = tensor([0, 2, 1, 3])]; bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; tensor transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_286 = transpose(perm = transpose_286_perm_0, x = k_75_cast_fp16)[name = string("transpose_582")]; tensor transpose_285 = transpose(perm = transpose_285_perm_0, x = q_59_cast_fp16)[name = string("transpose_583")]; tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_43_cast_fp16")]; int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; tensor var_1744_begin_0 = const()[name = string("op_1744_begin_0"), val = tensor([0, 0])]; tensor var_1744_end_mask_0 = const()[name = string("op_1744_end_mask_0"), val = tensor([false, true])]; tensor var_1744_cast_fp16 = slice_by_index(begin = var_1744_begin_0, end = concat_169, end_mask = var_1744_end_mask_0, x = mask_to_fp16)[name = string("op_1744_cast_fp16")]; int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; tensor var_1745_begin_0 = const()[name = string("op_1745_begin_0"), val = tensor([0, 0])]; tensor var_1745_end_mask_0 = const()[name = string("op_1745_end_mask_0"), val = tensor([true, false])]; tensor var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = concat_170, end_mask = var_1745_end_mask_0, x = var_1744_cast_fp16)[name = string("op_1745_cast_fp16")]; tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1745_cast_fp16)[name = string("qk_45_cast_fp16")]; tensor var_1748_cast_fp16 = softmax(axis = var_1657, x = qk_45_cast_fp16)[name = string("op_1748_cast_fp16")]; bool var_1750_transpose_x_0 = const()[name = string("op_1750_transpose_x_0"), val = bool(false)]; bool var_1750_transpose_y_0 = const()[name = string("op_1750_transpose_y_0"), val = bool(false)]; tensor v_75_cast_fp16 = transpose(perm = var_1741, x = var_1740_cast_fp16)[name = string("transpose_584")]; tensor var_1750_cast_fp16 = matmul(transpose_x = var_1750_transpose_x_0, transpose_y = var_1750_transpose_y_0, x = var_1748_cast_fp16, y = v_75_cast_fp16)[name = string("op_1750_cast_fp16")]; tensor var_1751 = const()[name = string("op_1751"), val = tensor([0, 2, 1, 3])]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 1280])]; tensor var_1752_cast_fp16 = transpose(perm = var_1751, x = var_1750_cast_fp16)[name = string("transpose_581")]; tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1752_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469431808)))]; tensor var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472708672)))]; tensor linear_59_cast_fp16 = linear(bias = var_1757_to_fp16, weight = var_1756_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; tensor var_1764_axes_0 = const()[name = string("op_1764_axes_0"), val = tensor([-1])]; tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472711296)))]; tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472713920)))]; tensor var_1764_cast_fp16 = layer_norm(axes = var_1764_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1764_cast_fp16")]; tensor var_1773_to_fp16 = const()[name = string("op_1773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472716544)))]; tensor var_1774_to_fp16 = const()[name = string("op_1774_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475993408)))]; tensor linear_60_cast_fp16 = linear(bias = var_1774_to_fp16, weight = var_1773_to_fp16, x = var_1764_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 20, 64])]; tensor var_1794_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1794_cast_fp16")]; tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_63_cast_fp16 = mul(x = var_1794_cast_fp16, y = const_190_to_fp16)[name = string("q_63_cast_fp16")]; tensor var_1800 = const()[name = string("op_1800"), val = tensor([1, 1500, 20, -1])]; tensor var_1801_cast_fp16 = reshape(shape = var_1800, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1801_cast_fp16")]; tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_79_cast_fp16 = mul(x = var_1801_cast_fp16, y = const_191_to_fp16)[name = string("k_79_cast_fp16")]; tensor var_1807 = const()[name = string("op_1807"), val = tensor([1, 1500, 20, -1])]; tensor var_1808_cast_fp16 = reshape(shape = var_1807, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1808_cast_fp16")]; tensor var_1809 = const()[name = string("op_1809"), val = tensor([0, 2, 1, 3])]; bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; tensor transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_288 = transpose(perm = transpose_288_perm_0, x = k_79_cast_fp16)[name = string("transpose_578")]; tensor transpose_287 = transpose(perm = transpose_287_perm_0, x = q_63_cast_fp16)[name = string("transpose_579")]; tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_47_cast_fp16")]; tensor var_1813_cast_fp16 = softmax(axis = var_1657, x = qk_47_cast_fp16)[name = string("op_1813_cast_fp16")]; bool var_1815_transpose_x_0 = const()[name = string("op_1815_transpose_x_0"), val = bool(false)]; bool var_1815_transpose_y_0 = const()[name = string("op_1815_transpose_y_0"), val = bool(false)]; tensor v_79_cast_fp16 = transpose(perm = var_1809, x = var_1808_cast_fp16)[name = string("transpose_580")]; tensor var_1815_cast_fp16 = matmul(transpose_x = var_1815_transpose_x_0, transpose_y = var_1815_transpose_y_0, x = var_1813_cast_fp16, y = v_79_cast_fp16)[name = string("op_1815_cast_fp16")]; tensor var_1816 = const()[name = string("op_1816"), val = tensor([0, 2, 1, 3])]; tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 1280])]; tensor var_1817_cast_fp16 = transpose(perm = var_1816, x = var_1815_cast_fp16)[name = string("transpose_577")]; tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1817_cast_fp16)[name = string("x_139_cast_fp16")]; tensor var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475996032)))]; tensor var_1822_to_fp16 = const()[name = string("op_1822_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479272896)))]; tensor linear_61_cast_fp16 = linear(bias = var_1822_to_fp16, weight = var_1821_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; tensor var_1829_axes_0 = const()[name = string("op_1829_axes_0"), val = tensor([-1])]; tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479275520)))]; tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479278144)))]; tensor var_1829_cast_fp16 = layer_norm(axes = var_1829_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1829_cast_fp16")]; tensor var_1838_to_fp16 = const()[name = string("op_1838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479280768)))]; tensor var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492388032)))]; tensor linear_62_cast_fp16 = linear(bias = var_1839_to_fp16, weight = var_1838_to_fp16, x = var_1829_cast_fp16)[name = string("linear_62_cast_fp16")]; string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; tensor var_1844_to_fp16 = const()[name = string("op_1844_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492398336)))]; tensor var_1845_to_fp16 = const()[name = string("op_1845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505505600)))]; tensor linear_63_cast_fp16 = linear(bias = var_1845_to_fp16, weight = var_1844_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 1280])]; tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_33_cast_fp16")]; tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 1280])]; tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_33_cast_fp16")]; tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 1280])]; tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 1280])]; tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; int32 var_1868 = const()[name = string("op_1868"), val = int32(-1)]; tensor var_1886_axes_0 = const()[name = string("op_1886_axes_0"), val = tensor([-1])]; tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505508224)))]; tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505510848)))]; fp16 var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1886_cast_fp16 = layer_norm(axes = var_1886_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1886_cast_fp16")]; tensor var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505513472)))]; tensor var_1898_to_fp16 = const()[name = string("op_1898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508790336)))]; tensor linear_64_cast_fp16 = linear(bias = var_1898_to_fp16, weight = var_1897_to_fp16, x = var_1886_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508792960)))]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1901_to_fp16, x = var_1886_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512069824)))]; tensor var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515346688)))]; tensor linear_66_cast_fp16 = linear(bias = var_1906_to_fp16, weight = var_1905_to_fp16, x = var_1886_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor var_1908_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1908_shape_cast_fp16")]; int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; string var_1908_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1908_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; tensor var_1908_shape_cast_fp16_to_uint16 = cast(dtype = var_1908_shape_cast_fp16_to_uint16_dtype_0, x = var_1908_shape_cast_fp16)[name = string("cast_374")]; uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1908_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_373")]; int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")]; tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")]; int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1280)]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; tensor var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor([0, 0, 0])]; tensor var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor([true, false, true])]; tensor var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_186, end_mask = var_1924_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1924_cast_fp16")]; tensor var_1927_begin_0 = const()[name = string("op_1927_begin_0"), val = tensor([0, 0, 0])]; tensor var_1927_end_mask_0 = const()[name = string("op_1927_end_mask_0"), val = tensor([true, false, true])]; tensor var_1927_cast_fp16 = slice_by_index(begin = var_1927_begin_0, end = concat_186, end_mask = var_1927_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1927_cast_fp16")]; tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 20, 64])]; tensor var_1937_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1937_cast_fp16")]; tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_67_cast_fp16 = mul(x = var_1937_cast_fp16, y = const_192_to_fp16)[name = string("q_67_cast_fp16")]; tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 20, 64])]; tensor var_1944_cast_fp16 = reshape(shape = concat_189x, x = var_1924_cast_fp16)[name = string("op_1944_cast_fp16")]; tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_85_cast_fp16 = mul(x = var_1944_cast_fp16, y = const_193_to_fp16)[name = string("k_85_cast_fp16")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 20, 64])]; tensor var_1951_cast_fp16 = reshape(shape = concat_190x, x = var_1927_cast_fp16)[name = string("op_1951_cast_fp16")]; tensor var_1952 = const()[name = string("op_1952"), val = tensor([0, 2, 1, 3])]; bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; tensor transpose_289_perm_0 = const()[name = string("transpose_289_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_290_perm_0 = const()[name = string("transpose_290_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_290 = transpose(perm = transpose_290_perm_0, x = k_85_cast_fp16)[name = string("transpose_574")]; tensor transpose_289 = transpose(perm = transpose_289_perm_0, x = q_67_cast_fp16)[name = string("transpose_575")]; tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_289, y = transpose_290)[name = string("qk_49_cast_fp16")]; int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; tensor var_1955_begin_0 = const()[name = string("op_1955_begin_0"), val = tensor([0, 0])]; tensor var_1955_end_mask_0 = const()[name = string("op_1955_end_mask_0"), val = tensor([false, true])]; tensor var_1955_cast_fp16 = slice_by_index(begin = var_1955_begin_0, end = concat_191, end_mask = var_1955_end_mask_0, x = mask_to_fp16)[name = string("op_1955_cast_fp16")]; int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; tensor var_1956_begin_0 = const()[name = string("op_1956_begin_0"), val = tensor([0, 0])]; tensor var_1956_end_mask_0 = const()[name = string("op_1956_end_mask_0"), val = tensor([true, false])]; tensor var_1956_cast_fp16 = slice_by_index(begin = var_1956_begin_0, end = concat_192, end_mask = var_1956_end_mask_0, x = var_1955_cast_fp16)[name = string("op_1956_cast_fp16")]; tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1956_cast_fp16)[name = string("qk_51_cast_fp16")]; tensor var_1959_cast_fp16 = softmax(axis = var_1868, x = qk_51_cast_fp16)[name = string("op_1959_cast_fp16")]; bool var_1961_transpose_x_0 = const()[name = string("op_1961_transpose_x_0"), val = bool(false)]; bool var_1961_transpose_y_0 = const()[name = string("op_1961_transpose_y_0"), val = bool(false)]; tensor v_85_cast_fp16 = transpose(perm = var_1952, x = var_1951_cast_fp16)[name = string("transpose_576")]; tensor var_1961_cast_fp16 = matmul(transpose_x = var_1961_transpose_x_0, transpose_y = var_1961_transpose_y_0, x = var_1959_cast_fp16, y = v_85_cast_fp16)[name = string("op_1961_cast_fp16")]; tensor var_1962 = const()[name = string("op_1962"), val = tensor([0, 2, 1, 3])]; tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 1280])]; tensor var_1963_cast_fp16 = transpose(perm = var_1962, x = var_1961_cast_fp16)[name = string("transpose_573")]; tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1963_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1967_to_fp16 = const()[name = string("op_1967_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515349312)))]; tensor var_1968_to_fp16 = const()[name = string("op_1968_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518626176)))]; tensor linear_67_cast_fp16 = linear(bias = var_1968_to_fp16, weight = var_1967_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1975_axes_0 = const()[name = string("op_1975_axes_0"), val = tensor([-1])]; tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518628800)))]; tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518631424)))]; tensor var_1975_cast_fp16 = layer_norm(axes = var_1975_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1975_cast_fp16")]; tensor var_1984_to_fp16 = const()[name = string("op_1984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518634048)))]; tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521910912)))]; tensor linear_68_cast_fp16 = linear(bias = var_1985_to_fp16, weight = var_1984_to_fp16, x = var_1975_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 20, 64])]; tensor var_2005_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_2005_cast_fp16")]; tensor const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_71_cast_fp16 = mul(x = var_2005_cast_fp16, y = const_194_to_fp16)[name = string("q_71_cast_fp16")]; tensor var_2011 = const()[name = string("op_2011"), val = tensor([1, 1500, 20, -1])]; tensor var_2012_cast_fp16 = reshape(shape = var_2011, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2012_cast_fp16")]; tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_89_cast_fp16 = mul(x = var_2012_cast_fp16, y = const_195_to_fp16)[name = string("k_89_cast_fp16")]; tensor var_2018 = const()[name = string("op_2018"), val = tensor([1, 1500, 20, -1])]; tensor var_2019_cast_fp16 = reshape(shape = var_2018, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2019_cast_fp16")]; tensor var_2020 = const()[name = string("op_2020"), val = tensor([0, 2, 1, 3])]; bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; tensor transpose_291_perm_0 = const()[name = string("transpose_291_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_292_perm_0 = const()[name = string("transpose_292_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_292 = transpose(perm = transpose_292_perm_0, x = k_89_cast_fp16)[name = string("transpose_570")]; tensor transpose_291 = transpose(perm = transpose_291_perm_0, x = q_71_cast_fp16)[name = string("transpose_571")]; tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_291, y = transpose_292)[name = string("qk_53_cast_fp16")]; tensor var_2024_cast_fp16 = softmax(axis = var_1868, x = qk_53_cast_fp16)[name = string("op_2024_cast_fp16")]; bool var_2026_transpose_x_0 = const()[name = string("op_2026_transpose_x_0"), val = bool(false)]; bool var_2026_transpose_y_0 = const()[name = string("op_2026_transpose_y_0"), val = bool(false)]; tensor v_89_cast_fp16 = transpose(perm = var_2020, x = var_2019_cast_fp16)[name = string("transpose_572")]; tensor var_2026_cast_fp16 = matmul(transpose_x = var_2026_transpose_x_0, transpose_y = var_2026_transpose_y_0, x = var_2024_cast_fp16, y = v_89_cast_fp16)[name = string("op_2026_cast_fp16")]; tensor var_2027 = const()[name = string("op_2027"), val = tensor([0, 2, 1, 3])]; tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 1280])]; tensor var_2028_cast_fp16 = transpose(perm = var_2027, x = var_2026_cast_fp16)[name = string("transpose_569")]; tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_2028_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521913536)))]; tensor var_2033_to_fp16 = const()[name = string("op_2033_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525190400)))]; tensor linear_69_cast_fp16 = linear(bias = var_2033_to_fp16, weight = var_2032_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; tensor var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor([-1])]; tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525193024)))]; tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525195648)))]; tensor var_2040_cast_fp16 = layer_norm(axes = var_2040_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2040_cast_fp16")]; tensor var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525198272)))]; tensor var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538305536)))]; tensor linear_70_cast_fp16 = linear(bias = var_2050_to_fp16, weight = var_2049_to_fp16, x = var_2040_cast_fp16)[name = string("linear_70_cast_fp16")]; string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; tensor var_2055_to_fp16 = const()[name = string("op_2055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538315840)))]; tensor var_2056_to_fp16 = const()[name = string("op_2056_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551423104)))]; tensor linear_71_cast_fp16 = linear(bias = var_2056_to_fp16, weight = var_2055_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 1280])]; tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_37_cast_fp16")]; tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 1280])]; tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_37_cast_fp16")]; tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 1280])]; tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 1280])]; tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)]; tensor var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor([-1])]; tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551425728)))]; tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551428352)))]; fp16 var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2097_cast_fp16 = layer_norm(axes = var_2097_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2097_cast_fp16")]; tensor var_2108_to_fp16 = const()[name = string("op_2108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551430976)))]; tensor var_2109_to_fp16 = const()[name = string("op_2109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554707840)))]; tensor linear_72_cast_fp16 = linear(bias = var_2109_to_fp16, weight = var_2108_to_fp16, x = var_2097_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554710464)))]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2112_to_fp16, x = var_2097_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557987328)))]; tensor var_2117_to_fp16 = const()[name = string("op_2117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561264192)))]; tensor linear_74_cast_fp16 = linear(bias = var_2117_to_fp16, weight = var_2116_to_fp16, x = var_2097_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2119_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2119_shape_cast_fp16")]; int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; string var_2119_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2119_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; tensor var_2119_shape_cast_fp16_to_uint16 = cast(dtype = var_2119_shape_cast_fp16_to_uint16_dtype_0, x = var_2119_shape_cast_fp16)[name = string("cast_372")]; uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2119_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_371")]; int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")]; tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")]; int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1280)]; int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; tensor var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor([0, 0, 0])]; tensor var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor([true, false, true])]; tensor var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_208, end_mask = var_2135_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2135_cast_fp16")]; tensor var_2138_begin_0 = const()[name = string("op_2138_begin_0"), val = tensor([0, 0, 0])]; tensor var_2138_end_mask_0 = const()[name = string("op_2138_end_mask_0"), val = tensor([true, false, true])]; tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = concat_208, end_mask = var_2138_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2138_cast_fp16")]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 20, 64])]; tensor var_2148_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2148_cast_fp16")]; tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_75_cast_fp16 = mul(x = var_2148_cast_fp16, y = const_196_to_fp16)[name = string("q_75_cast_fp16")]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 20, 64])]; tensor var_2155_cast_fp16 = reshape(shape = concat_211x, x = var_2135_cast_fp16)[name = string("op_2155_cast_fp16")]; tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_95_cast_fp16 = mul(x = var_2155_cast_fp16, y = const_197_to_fp16)[name = string("k_95_cast_fp16")]; tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 20, 64])]; tensor var_2162_cast_fp16 = reshape(shape = concat_212x, x = var_2138_cast_fp16)[name = string("op_2162_cast_fp16")]; tensor var_2163 = const()[name = string("op_2163"), val = tensor([0, 2, 1, 3])]; bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; tensor transpose_293_perm_0 = const()[name = string("transpose_293_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_294_perm_0 = const()[name = string("transpose_294_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_294 = transpose(perm = transpose_294_perm_0, x = k_95_cast_fp16)[name = string("transpose_566")]; tensor transpose_293 = transpose(perm = transpose_293_perm_0, x = q_75_cast_fp16)[name = string("transpose_567")]; tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_293, y = transpose_294)[name = string("qk_55_cast_fp16")]; int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; tensor var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor([0, 0])]; tensor var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor([false, true])]; tensor var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = concat_213, end_mask = var_2166_end_mask_0, x = mask_to_fp16)[name = string("op_2166_cast_fp16")]; int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; tensor var_2167_begin_0 = const()[name = string("op_2167_begin_0"), val = tensor([0, 0])]; tensor var_2167_end_mask_0 = const()[name = string("op_2167_end_mask_0"), val = tensor([true, false])]; tensor var_2167_cast_fp16 = slice_by_index(begin = var_2167_begin_0, end = concat_214, end_mask = var_2167_end_mask_0, x = var_2166_cast_fp16)[name = string("op_2167_cast_fp16")]; tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2167_cast_fp16)[name = string("qk_57_cast_fp16")]; tensor var_2170_cast_fp16 = softmax(axis = var_2079, x = qk_57_cast_fp16)[name = string("op_2170_cast_fp16")]; bool var_2172_transpose_x_0 = const()[name = string("op_2172_transpose_x_0"), val = bool(false)]; bool var_2172_transpose_y_0 = const()[name = string("op_2172_transpose_y_0"), val = bool(false)]; tensor v_95_cast_fp16 = transpose(perm = var_2163, x = var_2162_cast_fp16)[name = string("transpose_568")]; tensor var_2172_cast_fp16 = matmul(transpose_x = var_2172_transpose_x_0, transpose_y = var_2172_transpose_y_0, x = var_2170_cast_fp16, y = v_95_cast_fp16)[name = string("op_2172_cast_fp16")]; tensor var_2173 = const()[name = string("op_2173"), val = tensor([0, 2, 1, 3])]; tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 1280])]; tensor var_2174_cast_fp16 = transpose(perm = var_2173, x = var_2172_cast_fp16)[name = string("transpose_565")]; tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2174_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561266816)))]; tensor var_2179_to_fp16 = const()[name = string("op_2179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564543680)))]; tensor linear_75_cast_fp16 = linear(bias = var_2179_to_fp16, weight = var_2178_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; tensor var_2186_axes_0 = const()[name = string("op_2186_axes_0"), val = tensor([-1])]; tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564546304)))]; tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564548928)))]; tensor var_2186_cast_fp16 = layer_norm(axes = var_2186_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2186_cast_fp16")]; tensor var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564551552)))]; tensor var_2196_to_fp16 = const()[name = string("op_2196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567828416)))]; tensor linear_76_cast_fp16 = linear(bias = var_2196_to_fp16, weight = var_2195_to_fp16, x = var_2186_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 20, 64])]; tensor var_2216_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2216_cast_fp16")]; tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_79_cast_fp16 = mul(x = var_2216_cast_fp16, y = const_198_to_fp16)[name = string("q_79_cast_fp16")]; tensor var_2222 = const()[name = string("op_2222"), val = tensor([1, 1500, 20, -1])]; tensor var_2223_cast_fp16 = reshape(shape = var_2222, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2223_cast_fp16")]; tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_99_cast_fp16 = mul(x = var_2223_cast_fp16, y = const_199_to_fp16)[name = string("k_99_cast_fp16")]; tensor var_2229 = const()[name = string("op_2229"), val = tensor([1, 1500, 20, -1])]; tensor var_2230_cast_fp16 = reshape(shape = var_2229, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2230_cast_fp16")]; tensor var_2231 = const()[name = string("op_2231"), val = tensor([0, 2, 1, 3])]; bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; tensor transpose_295_perm_0 = const()[name = string("transpose_295_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_296_perm_0 = const()[name = string("transpose_296_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_296 = transpose(perm = transpose_296_perm_0, x = k_99_cast_fp16)[name = string("transpose_562")]; tensor transpose_295 = transpose(perm = transpose_295_perm_0, x = q_79_cast_fp16)[name = string("transpose_563")]; tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_295, y = transpose_296)[name = string("qk_59_cast_fp16")]; tensor var_2235_cast_fp16 = softmax(axis = var_2079, x = qk_59_cast_fp16)[name = string("op_2235_cast_fp16")]; bool var_2237_transpose_x_0 = const()[name = string("op_2237_transpose_x_0"), val = bool(false)]; bool var_2237_transpose_y_0 = const()[name = string("op_2237_transpose_y_0"), val = bool(false)]; tensor v_99_cast_fp16 = transpose(perm = var_2231, x = var_2230_cast_fp16)[name = string("transpose_564")]; tensor var_2237_cast_fp16 = matmul(transpose_x = var_2237_transpose_x_0, transpose_y = var_2237_transpose_y_0, x = var_2235_cast_fp16, y = v_99_cast_fp16)[name = string("op_2237_cast_fp16")]; tensor var_2238 = const()[name = string("op_2238"), val = tensor([0, 2, 1, 3])]; tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 1280])]; tensor var_2239_cast_fp16 = transpose(perm = var_2238, x = var_2237_cast_fp16)[name = string("transpose_561")]; tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2239_cast_fp16)[name = string("x_175_cast_fp16")]; tensor var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567831040)))]; tensor var_2244_to_fp16 = const()[name = string("op_2244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107904)))]; tensor linear_77_cast_fp16 = linear(bias = var_2244_to_fp16, weight = var_2243_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; tensor var_2251_axes_0 = const()[name = string("op_2251_axes_0"), val = tensor([-1])]; tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571110528)))]; tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571113152)))]; tensor var_2251_cast_fp16 = layer_norm(axes = var_2251_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2251_cast_fp16")]; tensor var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571115776)))]; tensor var_2261_to_fp16 = const()[name = string("op_2261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584223040)))]; tensor linear_78_cast_fp16 = linear(bias = var_2261_to_fp16, weight = var_2260_to_fp16, x = var_2251_cast_fp16)[name = string("linear_78_cast_fp16")]; string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_2266_to_fp16 = const()[name = string("op_2266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584233344)))]; tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597340608)))]; tensor linear_79_cast_fp16 = linear(bias = var_2267_to_fp16, weight = var_2266_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 1280])]; tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_41_cast_fp16")]; tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 1280])]; tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_41_cast_fp16")]; tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 1280])]; tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 1280])]; tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; int32 var_2290 = const()[name = string("op_2290"), val = int32(-1)]; tensor var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor([-1])]; tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597343232)))]; tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597345856)))]; fp16 var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2308_cast_fp16")]; tensor var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597348480)))]; tensor var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600625344)))]; tensor linear_80_cast_fp16 = linear(bias = var_2320_to_fp16, weight = var_2319_to_fp16, x = var_2308_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600627968)))]; tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2323_to_fp16, x = var_2308_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603904832)))]; tensor var_2328_to_fp16 = const()[name = string("op_2328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607181696)))]; tensor linear_82_cast_fp16 = linear(bias = var_2328_to_fp16, weight = var_2327_to_fp16, x = var_2308_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor var_2330_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2330_shape_cast_fp16")]; int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; string var_2330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; tensor var_2330_shape_cast_fp16_to_uint16 = cast(dtype = var_2330_shape_cast_fp16_to_uint16_dtype_0, x = var_2330_shape_cast_fp16)[name = string("cast_370")]; uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2330_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_369")]; int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")]; tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")]; int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1280)]; int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; tensor var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor([0, 0, 0])]; tensor var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor([true, false, true])]; tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_230, end_mask = var_2346_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2346_cast_fp16")]; tensor var_2349_begin_0 = const()[name = string("op_2349_begin_0"), val = tensor([0, 0, 0])]; tensor var_2349_end_mask_0 = const()[name = string("op_2349_end_mask_0"), val = tensor([true, false, true])]; tensor var_2349_cast_fp16 = slice_by_index(begin = var_2349_begin_0, end = concat_230, end_mask = var_2349_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2349_cast_fp16")]; tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 20, 64])]; tensor var_2359_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2359_cast_fp16")]; tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_83_cast_fp16 = mul(x = var_2359_cast_fp16, y = const_200_to_fp16)[name = string("q_83_cast_fp16")]; tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 20, 64])]; tensor var_2366_cast_fp16 = reshape(shape = concat_233x, x = var_2346_cast_fp16)[name = string("op_2366_cast_fp16")]; tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_105_cast_fp16 = mul(x = var_2366_cast_fp16, y = const_201_to_fp16)[name = string("k_105_cast_fp16")]; tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 20, 64])]; tensor var_2373_cast_fp16 = reshape(shape = concat_234x, x = var_2349_cast_fp16)[name = string("op_2373_cast_fp16")]; tensor var_2374 = const()[name = string("op_2374"), val = tensor([0, 2, 1, 3])]; bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; tensor transpose_297_perm_0 = const()[name = string("transpose_297_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_298_perm_0 = const()[name = string("transpose_298_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_298 = transpose(perm = transpose_298_perm_0, x = k_105_cast_fp16)[name = string("transpose_558")]; tensor transpose_297 = transpose(perm = transpose_297_perm_0, x = q_83_cast_fp16)[name = string("transpose_559")]; tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_297, y = transpose_298)[name = string("qk_61_cast_fp16")]; int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; tensor var_2377_begin_0 = const()[name = string("op_2377_begin_0"), val = tensor([0, 0])]; tensor var_2377_end_mask_0 = const()[name = string("op_2377_end_mask_0"), val = tensor([false, true])]; tensor var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = concat_235, end_mask = var_2377_end_mask_0, x = mask_to_fp16)[name = string("op_2377_cast_fp16")]; int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; tensor var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor([0, 0])]; tensor var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor([true, false])]; tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = concat_236, end_mask = var_2378_end_mask_0, x = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")]; tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2378_cast_fp16)[name = string("qk_63_cast_fp16")]; tensor var_2381_cast_fp16 = softmax(axis = var_2290, x = qk_63_cast_fp16)[name = string("op_2381_cast_fp16")]; bool var_2383_transpose_x_0 = const()[name = string("op_2383_transpose_x_0"), val = bool(false)]; bool var_2383_transpose_y_0 = const()[name = string("op_2383_transpose_y_0"), val = bool(false)]; tensor v_105_cast_fp16 = transpose(perm = var_2374, x = var_2373_cast_fp16)[name = string("transpose_560")]; tensor var_2383_cast_fp16 = matmul(transpose_x = var_2383_transpose_x_0, transpose_y = var_2383_transpose_y_0, x = var_2381_cast_fp16, y = v_105_cast_fp16)[name = string("op_2383_cast_fp16")]; tensor var_2384 = const()[name = string("op_2384"), val = tensor([0, 2, 1, 3])]; tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 1280])]; tensor var_2385_cast_fp16 = transpose(perm = var_2384, x = var_2383_cast_fp16)[name = string("transpose_557")]; tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2385_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_2389_to_fp16 = const()[name = string("op_2389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607184320)))]; tensor var_2390_to_fp16 = const()[name = string("op_2390_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610461184)))]; tensor linear_83_cast_fp16 = linear(bias = var_2390_to_fp16, weight = var_2389_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_2397_axes_0 = const()[name = string("op_2397_axes_0"), val = tensor([-1])]; tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610463808)))]; tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610466432)))]; tensor var_2397_cast_fp16 = layer_norm(axes = var_2397_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2397_cast_fp16")]; tensor var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610469056)))]; tensor var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613745920)))]; tensor linear_84_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = var_2397_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 20, 64])]; tensor var_2427_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2427_cast_fp16")]; tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_87_cast_fp16 = mul(x = var_2427_cast_fp16, y = const_202_to_fp16)[name = string("q_87_cast_fp16")]; tensor var_2433 = const()[name = string("op_2433"), val = tensor([1, 1500, 20, -1])]; tensor var_2434_cast_fp16 = reshape(shape = var_2433, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2434_cast_fp16")]; tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_109_cast_fp16 = mul(x = var_2434_cast_fp16, y = const_203_to_fp16)[name = string("k_109_cast_fp16")]; tensor var_2440 = const()[name = string("op_2440"), val = tensor([1, 1500, 20, -1])]; tensor var_2441_cast_fp16 = reshape(shape = var_2440, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2441_cast_fp16")]; tensor var_2442 = const()[name = string("op_2442"), val = tensor([0, 2, 1, 3])]; bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; tensor transpose_299_perm_0 = const()[name = string("transpose_299_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_300_perm_0 = const()[name = string("transpose_300_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_300 = transpose(perm = transpose_300_perm_0, x = k_109_cast_fp16)[name = string("transpose_554")]; tensor transpose_299 = transpose(perm = transpose_299_perm_0, x = q_87_cast_fp16)[name = string("transpose_555")]; tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_299, y = transpose_300)[name = string("qk_65_cast_fp16")]; tensor var_2446_cast_fp16 = softmax(axis = var_2290, x = qk_65_cast_fp16)[name = string("op_2446_cast_fp16")]; bool var_2448_transpose_x_0 = const()[name = string("op_2448_transpose_x_0"), val = bool(false)]; bool var_2448_transpose_y_0 = const()[name = string("op_2448_transpose_y_0"), val = bool(false)]; tensor v_109_cast_fp16 = transpose(perm = var_2442, x = var_2441_cast_fp16)[name = string("transpose_556")]; tensor var_2448_cast_fp16 = matmul(transpose_x = var_2448_transpose_x_0, transpose_y = var_2448_transpose_y_0, x = var_2446_cast_fp16, y = v_109_cast_fp16)[name = string("op_2448_cast_fp16")]; tensor var_2449 = const()[name = string("op_2449"), val = tensor([0, 2, 1, 3])]; tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 1280])]; tensor var_2450_cast_fp16 = transpose(perm = var_2449, x = var_2448_cast_fp16)[name = string("transpose_553")]; tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2450_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613748544)))]; tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617025408)))]; tensor linear_85_cast_fp16 = linear(bias = var_2455_to_fp16, weight = var_2454_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; tensor var_2462_axes_0 = const()[name = string("op_2462_axes_0"), val = tensor([-1])]; tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617028032)))]; tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617030656)))]; tensor var_2462_cast_fp16 = layer_norm(axes = var_2462_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2462_cast_fp16")]; tensor var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617033280)))]; tensor var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630140544)))]; tensor linear_86_cast_fp16 = linear(bias = var_2472_to_fp16, weight = var_2471_to_fp16, x = var_2462_cast_fp16)[name = string("linear_86_cast_fp16")]; string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; tensor var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630150848)))]; tensor var_2478_to_fp16 = const()[name = string("op_2478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643258112)))]; tensor linear_87_cast_fp16 = linear(bias = var_2478_to_fp16, weight = var_2477_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 1280])]; tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_45_cast_fp16")]; tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 1280])]; tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_45_cast_fp16")]; tensor k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; tensor k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor([12, 1, 1500, 1280])]; tensor k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")]; tensor v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; tensor v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor([12, 1, 1500, 1280])]; tensor v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")]; int32 var_2501 = const()[name = string("op_2501"), val = int32(-1)]; tensor var_2519_axes_0 = const()[name = string("op_2519_axes_0"), val = tensor([-1])]; tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643260736)))]; tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643263360)))]; fp16 var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2519_cast_fp16 = layer_norm(axes = var_2519_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2519_cast_fp16")]; tensor var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643265984)))]; tensor var_2531_to_fp16 = const()[name = string("op_2531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646542848)))]; tensor linear_88_cast_fp16 = linear(bias = var_2531_to_fp16, weight = var_2530_to_fp16, x = var_2519_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2534_to_fp16 = const()[name = string("op_2534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646545472)))]; tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2534_to_fp16, x = var_2519_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649822336)))]; tensor var_2539_to_fp16 = const()[name = string("op_2539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653099200)))]; tensor linear_90_cast_fp16 = linear(bias = var_2539_to_fp16, weight = var_2538_to_fp16, x = var_2519_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor var_2541_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2541_shape_cast_fp16")]; int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; string var_2541_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2541_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; tensor var_2541_shape_cast_fp16_to_uint16 = cast(dtype = var_2541_shape_cast_fp16_to_uint16_dtype_0, x = var_2541_shape_cast_fp16)[name = string("cast_368")]; uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2541_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_367")]; int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")]; tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")]; tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")]; tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")]; int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1280)]; int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")]; tensor var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor([0, 0, 0])]; tensor var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor([true, false, true])]; tensor var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_252, end_mask = var_2557_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2557_cast_fp16")]; tensor var_2560_begin_0 = const()[name = string("op_2560_begin_0"), val = tensor([0, 0, 0])]; tensor var_2560_end_mask_0 = const()[name = string("op_2560_end_mask_0"), val = tensor([true, false, true])]; tensor var_2560_cast_fp16 = slice_by_index(begin = var_2560_begin_0, end = concat_252, end_mask = var_2560_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2560_cast_fp16")]; tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 20, 64])]; tensor var_2570_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2570_cast_fp16")]; tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_91_cast_fp16 = mul(x = var_2570_cast_fp16, y = const_204_to_fp16)[name = string("q_91_cast_fp16")]; tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 20, 64])]; tensor var_2577_cast_fp16 = reshape(shape = concat_255x, x = var_2557_cast_fp16)[name = string("op_2577_cast_fp16")]; tensor const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_115_cast_fp16 = mul(x = var_2577_cast_fp16, y = const_205_to_fp16)[name = string("k_115_cast_fp16")]; tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 20, 64])]; tensor var_2584_cast_fp16 = reshape(shape = concat_256x, x = var_2560_cast_fp16)[name = string("op_2584_cast_fp16")]; tensor var_2585 = const()[name = string("op_2585"), val = tensor([0, 2, 1, 3])]; bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; tensor transpose_301_perm_0 = const()[name = string("transpose_301_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_302_perm_0 = const()[name = string("transpose_302_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_302 = transpose(perm = transpose_302_perm_0, x = k_115_cast_fp16)[name = string("transpose_550")]; tensor transpose_301 = transpose(perm = transpose_301_perm_0, x = q_91_cast_fp16)[name = string("transpose_551")]; tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_301, y = transpose_302)[name = string("qk_67_cast_fp16")]; int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; tensor var_2588_begin_0 = const()[name = string("op_2588_begin_0"), val = tensor([0, 0])]; tensor var_2588_end_mask_0 = const()[name = string("op_2588_end_mask_0"), val = tensor([false, true])]; tensor var_2588_cast_fp16 = slice_by_index(begin = var_2588_begin_0, end = concat_257, end_mask = var_2588_end_mask_0, x = mask_to_fp16)[name = string("op_2588_cast_fp16")]; int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; tensor var_2589_begin_0 = const()[name = string("op_2589_begin_0"), val = tensor([0, 0])]; tensor var_2589_end_mask_0 = const()[name = string("op_2589_end_mask_0"), val = tensor([true, false])]; tensor var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = concat_258, end_mask = var_2589_end_mask_0, x = var_2588_cast_fp16)[name = string("op_2589_cast_fp16")]; tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2589_cast_fp16)[name = string("qk_69_cast_fp16")]; tensor var_2592_cast_fp16 = softmax(axis = var_2501, x = qk_69_cast_fp16)[name = string("op_2592_cast_fp16")]; bool var_2594_transpose_x_0 = const()[name = string("op_2594_transpose_x_0"), val = bool(false)]; bool var_2594_transpose_y_0 = const()[name = string("op_2594_transpose_y_0"), val = bool(false)]; tensor v_115_cast_fp16 = transpose(perm = var_2585, x = var_2584_cast_fp16)[name = string("transpose_552")]; tensor var_2594_cast_fp16 = matmul(transpose_x = var_2594_transpose_x_0, transpose_y = var_2594_transpose_y_0, x = var_2592_cast_fp16, y = v_115_cast_fp16)[name = string("op_2594_cast_fp16")]; tensor var_2595 = const()[name = string("op_2595"), val = tensor([0, 2, 1, 3])]; tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 1280])]; tensor var_2596_cast_fp16 = transpose(perm = var_2595, x = var_2594_cast_fp16)[name = string("transpose_549")]; tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2596_cast_fp16)[name = string("x_205_cast_fp16")]; tensor var_2600_to_fp16 = const()[name = string("op_2600_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653101824)))]; tensor var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656378688)))]; tensor linear_91_cast_fp16 = linear(bias = var_2601_to_fp16, weight = var_2600_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_2608_axes_0 = const()[name = string("op_2608_axes_0"), val = tensor([-1])]; tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656381312)))]; tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656383936)))]; tensor var_2608_cast_fp16 = layer_norm(axes = var_2608_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2608_cast_fp16")]; tensor var_2617_to_fp16 = const()[name = string("op_2617_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656386560)))]; tensor var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659663424)))]; tensor linear_92_cast_fp16 = linear(bias = var_2618_to_fp16, weight = var_2617_to_fp16, x = var_2608_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 20, 64])]; tensor var_2638_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2638_cast_fp16")]; tensor const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_95_cast_fp16 = mul(x = var_2638_cast_fp16, y = const_206_to_fp16)[name = string("q_95_cast_fp16")]; tensor var_2644 = const()[name = string("op_2644"), val = tensor([1, 1500, 20, -1])]; tensor var_2645_cast_fp16 = reshape(shape = var_2644, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2645_cast_fp16")]; tensor const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_119_cast_fp16 = mul(x = var_2645_cast_fp16, y = const_207_to_fp16)[name = string("k_119_cast_fp16")]; tensor var_2651 = const()[name = string("op_2651"), val = tensor([1, 1500, 20, -1])]; tensor var_2652_cast_fp16 = reshape(shape = var_2651, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2652_cast_fp16")]; tensor var_2653 = const()[name = string("op_2653"), val = tensor([0, 2, 1, 3])]; bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)]; bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)]; tensor transpose_303_perm_0 = const()[name = string("transpose_303_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_304_perm_0 = const()[name = string("transpose_304_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_304 = transpose(perm = transpose_304_perm_0, x = k_119_cast_fp16)[name = string("transpose_546")]; tensor transpose_303 = transpose(perm = transpose_303_perm_0, x = q_95_cast_fp16)[name = string("transpose_547")]; tensor qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_303, y = transpose_304)[name = string("qk_71_cast_fp16")]; tensor var_2657_cast_fp16 = softmax(axis = var_2501, x = qk_71_cast_fp16)[name = string("op_2657_cast_fp16")]; bool var_2659_transpose_x_0 = const()[name = string("op_2659_transpose_x_0"), val = bool(false)]; bool var_2659_transpose_y_0 = const()[name = string("op_2659_transpose_y_0"), val = bool(false)]; tensor v_119_cast_fp16 = transpose(perm = var_2653, x = var_2652_cast_fp16)[name = string("transpose_548")]; tensor var_2659_cast_fp16 = matmul(transpose_x = var_2659_transpose_x_0, transpose_y = var_2659_transpose_y_0, x = var_2657_cast_fp16, y = v_119_cast_fp16)[name = string("op_2659_cast_fp16")]; tensor var_2660 = const()[name = string("op_2660"), val = tensor([0, 2, 1, 3])]; tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 1280])]; tensor var_2661_cast_fp16 = transpose(perm = var_2660, x = var_2659_cast_fp16)[name = string("transpose_545")]; tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2661_cast_fp16)[name = string("x_211_cast_fp16")]; tensor var_2665_to_fp16 = const()[name = string("op_2665_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659666048)))]; tensor var_2666_to_fp16 = const()[name = string("op_2666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662942912)))]; tensor linear_93_cast_fp16 = linear(bias = var_2666_to_fp16, weight = var_2665_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_2673_axes_0 = const()[name = string("op_2673_axes_0"), val = tensor([-1])]; tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662945536)))]; tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662948160)))]; tensor var_2673_cast_fp16 = layer_norm(axes = var_2673_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2673_cast_fp16")]; tensor var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662950784)))]; tensor var_2683_to_fp16 = const()[name = string("op_2683_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676058048)))]; tensor linear_94_cast_fp16 = linear(bias = var_2683_to_fp16, weight = var_2682_to_fp16, x = var_2673_cast_fp16)[name = string("linear_94_cast_fp16")]; string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; tensor var_2688_to_fp16 = const()[name = string("op_2688_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676068352)))]; tensor var_2689_to_fp16 = const()[name = string("op_2689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689175616)))]; tensor linear_95_cast_fp16 = linear(bias = var_2689_to_fp16, weight = var_2688_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; tensor k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; tensor k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor([13, 1, 448, 1280])]; tensor k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_49_cast_fp16")]; tensor v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; tensor v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor([13, 1, 448, 1280])]; tensor v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_49_cast_fp16")]; tensor k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; tensor k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor([13, 1, 1500, 1280])]; tensor k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")]; tensor v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; tensor v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor([13, 1, 1500, 1280])]; tensor v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")]; int32 var_2712 = const()[name = string("op_2712"), val = int32(-1)]; tensor var_2730_axes_0 = const()[name = string("op_2730_axes_0"), val = tensor([-1])]; tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689178240)))]; tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689180864)))]; fp16 var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2730_cast_fp16 = layer_norm(axes = var_2730_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2730_cast_fp16")]; tensor var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689183488)))]; tensor var_2742_to_fp16 = const()[name = string("op_2742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692460352)))]; tensor linear_96_cast_fp16 = linear(bias = var_2742_to_fp16, weight = var_2741_to_fp16, x = var_2730_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor var_2745_to_fp16 = const()[name = string("op_2745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692462976)))]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2745_to_fp16, x = var_2730_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor var_2749_to_fp16 = const()[name = string("op_2749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695739840)))]; tensor var_2750_to_fp16 = const()[name = string("op_2750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699016704)))]; tensor linear_98_cast_fp16 = linear(bias = var_2750_to_fp16, weight = var_2749_to_fp16, x = var_2730_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor var_2752_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2752_shape_cast_fp16")]; int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)]; int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)]; bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)]; string var_2752_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2752_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)]; tensor var_2752_shape_cast_fp16_to_uint16 = cast(dtype = var_2752_shape_cast_fp16_to_uint16_dtype_0, x = var_2752_shape_cast_fp16)[name = string("cast_366")]; uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2752_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")]; string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_365")]; int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([0])]; tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")]; tensor concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor([12])]; int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)]; bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)]; tensor concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")]; tensor concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor([0])]; tensor concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor([0])]; tensor concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor([0])]; int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)]; bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)]; tensor concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")]; tensor k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")]; tensor v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")]; int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)]; int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1280)]; int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)]; bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)]; tensor concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")]; tensor var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor([0, 0, 0])]; tensor var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor([true, false, true])]; tensor var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_274, end_mask = var_2768_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2768_cast_fp16")]; tensor var_2771_begin_0 = const()[name = string("op_2771_begin_0"), val = tensor([0, 0, 0])]; tensor var_2771_end_mask_0 = const()[name = string("op_2771_end_mask_0"), val = tensor([true, false, true])]; tensor var_2771_cast_fp16 = slice_by_index(begin = var_2771_begin_0, end = concat_274, end_mask = var_2771_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2771_cast_fp16")]; tensor concat_276x = const()[name = string("concat_276x"), val = tensor([1, -1, 20, 64])]; tensor var_2781_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2781_cast_fp16")]; tensor const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_99_cast_fp16 = mul(x = var_2781_cast_fp16, y = const_208_to_fp16)[name = string("q_99_cast_fp16")]; tensor concat_277x = const()[name = string("concat_277x"), val = tensor([1, -1, 20, 64])]; tensor var_2788_cast_fp16 = reshape(shape = concat_277x, x = var_2768_cast_fp16)[name = string("op_2788_cast_fp16")]; tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_125_cast_fp16 = mul(x = var_2788_cast_fp16, y = const_209_to_fp16)[name = string("k_125_cast_fp16")]; tensor concat_278x = const()[name = string("concat_278x"), val = tensor([1, -1, 20, 64])]; tensor var_2795_cast_fp16 = reshape(shape = concat_278x, x = var_2771_cast_fp16)[name = string("op_2795_cast_fp16")]; tensor var_2796 = const()[name = string("op_2796"), val = tensor([0, 2, 1, 3])]; bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)]; bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)]; tensor transpose_305_perm_0 = const()[name = string("transpose_305_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_306_perm_0 = const()[name = string("transpose_306_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_306 = transpose(perm = transpose_306_perm_0, x = k_125_cast_fp16)[name = string("transpose_542")]; tensor transpose_305 = transpose(perm = transpose_305_perm_0, x = q_99_cast_fp16)[name = string("transpose_543")]; tensor qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_305, y = transpose_306)[name = string("qk_73_cast_fp16")]; int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)]; int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")]; tensor var_2799_begin_0 = const()[name = string("op_2799_begin_0"), val = tensor([0, 0])]; tensor var_2799_end_mask_0 = const()[name = string("op_2799_end_mask_0"), val = tensor([false, true])]; tensor var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = concat_279, end_mask = var_2799_end_mask_0, x = mask_to_fp16)[name = string("op_2799_cast_fp16")]; int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)]; int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)]; bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)]; tensor concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")]; tensor var_2800_begin_0 = const()[name = string("op_2800_begin_0"), val = tensor([0, 0])]; tensor var_2800_end_mask_0 = const()[name = string("op_2800_end_mask_0"), val = tensor([true, false])]; tensor var_2800_cast_fp16 = slice_by_index(begin = var_2800_begin_0, end = concat_280, end_mask = var_2800_end_mask_0, x = var_2799_cast_fp16)[name = string("op_2800_cast_fp16")]; tensor qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2800_cast_fp16)[name = string("qk_75_cast_fp16")]; tensor var_2803_cast_fp16 = softmax(axis = var_2712, x = qk_75_cast_fp16)[name = string("op_2803_cast_fp16")]; bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)]; bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(false)]; tensor v_125_cast_fp16 = transpose(perm = var_2796, x = var_2795_cast_fp16)[name = string("transpose_544")]; tensor var_2805_cast_fp16 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = var_2803_cast_fp16, y = v_125_cast_fp16)[name = string("op_2805_cast_fp16")]; tensor var_2806 = const()[name = string("op_2806"), val = tensor([0, 2, 1, 3])]; tensor concat_281x = const()[name = string("concat_281x"), val = tensor([1, -1, 1280])]; tensor var_2807_cast_fp16 = transpose(perm = var_2806, x = var_2805_cast_fp16)[name = string("transpose_541")]; tensor x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2807_cast_fp16)[name = string("x_223_cast_fp16")]; tensor var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699019328)))]; tensor var_2812_to_fp16 = const()[name = string("op_2812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702296192)))]; tensor linear_99_cast_fp16 = linear(bias = var_2812_to_fp16, weight = var_2811_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")]; tensor var_2819_axes_0 = const()[name = string("op_2819_axes_0"), val = tensor([-1])]; tensor blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702298816)))]; tensor blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702301440)))]; tensor var_2819_cast_fp16 = layer_norm(axes = var_2819_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2819_cast_fp16")]; tensor var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702304064)))]; tensor var_2829_to_fp16 = const()[name = string("op_2829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705580928)))]; tensor linear_100_cast_fp16 = linear(bias = var_2829_to_fp16, weight = var_2828_to_fp16, x = var_2819_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor concat_282 = const()[name = string("concat_282"), val = tensor([0, 0, 0])]; tensor concat_283 = const()[name = string("concat_283"), val = tensor([0, 1500, 0])]; tensor k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")]; tensor concat_284 = const()[name = string("concat_284"), val = tensor([0, 0, 0])]; tensor concat_285 = const()[name = string("concat_285"), val = tensor([0, 1500, 0])]; tensor v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")]; tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 20, 64])]; tensor var_2849_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2849_cast_fp16")]; tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_103_cast_fp16 = mul(x = var_2849_cast_fp16, y = const_210_to_fp16)[name = string("q_103_cast_fp16")]; tensor var_2855 = const()[name = string("op_2855"), val = tensor([1, 1500, 20, -1])]; tensor var_2856_cast_fp16 = reshape(shape = var_2855, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2856_cast_fp16")]; tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_129_cast_fp16 = mul(x = var_2856_cast_fp16, y = const_211_to_fp16)[name = string("k_129_cast_fp16")]; tensor var_2862 = const()[name = string("op_2862"), val = tensor([1, 1500, 20, -1])]; tensor var_2863_cast_fp16 = reshape(shape = var_2862, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2863_cast_fp16")]; tensor var_2864 = const()[name = string("op_2864"), val = tensor([0, 2, 1, 3])]; bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)]; bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)]; tensor transpose_307_perm_0 = const()[name = string("transpose_307_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_308_perm_0 = const()[name = string("transpose_308_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_308 = transpose(perm = transpose_308_perm_0, x = k_129_cast_fp16)[name = string("transpose_538")]; tensor transpose_307 = transpose(perm = transpose_307_perm_0, x = q_103_cast_fp16)[name = string("transpose_539")]; tensor qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_307, y = transpose_308)[name = string("qk_77_cast_fp16")]; tensor var_2868_cast_fp16 = softmax(axis = var_2712, x = qk_77_cast_fp16)[name = string("op_2868_cast_fp16")]; bool var_2870_transpose_x_0 = const()[name = string("op_2870_transpose_x_0"), val = bool(false)]; bool var_2870_transpose_y_0 = const()[name = string("op_2870_transpose_y_0"), val = bool(false)]; tensor v_129_cast_fp16 = transpose(perm = var_2864, x = var_2863_cast_fp16)[name = string("transpose_540")]; tensor var_2870_cast_fp16 = matmul(transpose_x = var_2870_transpose_x_0, transpose_y = var_2870_transpose_y_0, x = var_2868_cast_fp16, y = v_129_cast_fp16)[name = string("op_2870_cast_fp16")]; tensor var_2871 = const()[name = string("op_2871"), val = tensor([0, 2, 1, 3])]; tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 1280])]; tensor var_2872_cast_fp16 = transpose(perm = var_2871, x = var_2870_cast_fp16)[name = string("transpose_537")]; tensor x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2872_cast_fp16)[name = string("x_229_cast_fp16")]; tensor var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705583552)))]; tensor var_2877_to_fp16 = const()[name = string("op_2877_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708860416)))]; tensor linear_101_cast_fp16 = linear(bias = var_2877_to_fp16, weight = var_2876_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")]; tensor var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor([-1])]; tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708863040)))]; tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708865664)))]; tensor var_2884_cast_fp16 = layer_norm(axes = var_2884_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2884_cast_fp16")]; tensor var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708868288)))]; tensor var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721975552)))]; tensor linear_102_cast_fp16 = linear(bias = var_2894_to_fp16, weight = var_2893_to_fp16, x = var_2884_cast_fp16)[name = string("linear_102_cast_fp16")]; string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")]; tensor x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")]; tensor var_2899_to_fp16 = const()[name = string("op_2899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721985856)))]; tensor var_2900_to_fp16 = const()[name = string("op_2900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735093120)))]; tensor linear_103_cast_fp16 = linear(bias = var_2900_to_fp16, weight = var_2899_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")]; tensor k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; tensor k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor([14, 1, 448, 1280])]; tensor k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_53_cast_fp16")]; tensor v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; tensor v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor([14, 1, 448, 1280])]; tensor v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_53_cast_fp16")]; tensor k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; tensor k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor([14, 1, 1500, 1280])]; tensor k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")]; tensor v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; tensor v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor([14, 1, 1500, 1280])]; tensor v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")]; int32 var_2923 = const()[name = string("op_2923"), val = int32(-1)]; tensor var_2941_axes_0 = const()[name = string("op_2941_axes_0"), val = tensor([-1])]; tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735095744)))]; tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735098368)))]; fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2941_cast_fp16 = layer_norm(axes = var_2941_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2941_cast_fp16")]; tensor var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735100992)))]; tensor var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738377856)))]; tensor linear_104_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = var_2941_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor var_2956_to_fp16 = const()[name = string("op_2956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738380480)))]; tensor linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2956_to_fp16, x = var_2941_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor var_2960_to_fp16 = const()[name = string("op_2960_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741657344)))]; tensor var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744934208)))]; tensor linear_106_cast_fp16 = linear(bias = var_2961_to_fp16, weight = var_2960_to_fp16, x = var_2941_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor var_2963_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2963_shape_cast_fp16")]; int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)]; int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)]; bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)]; string var_2963_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2963_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)]; tensor var_2963_shape_cast_fp16_to_uint16 = cast(dtype = var_2963_shape_cast_fp16_to_uint16_dtype_0, x = var_2963_shape_cast_fp16)[name = string("cast_364")]; uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2963_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")]; string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_363")]; int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")]; tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([0])]; tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([0])]; tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")]; tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([13])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")]; tensor concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor([0])]; tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")]; tensor k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")]; tensor v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")]; int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)]; int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1280)]; int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)]; bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)]; tensor concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")]; tensor var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor([0, 0, 0])]; tensor var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor([true, false, true])]; tensor var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_296, end_mask = var_2979_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2979_cast_fp16")]; tensor var_2982_begin_0 = const()[name = string("op_2982_begin_0"), val = tensor([0, 0, 0])]; tensor var_2982_end_mask_0 = const()[name = string("op_2982_end_mask_0"), val = tensor([true, false, true])]; tensor var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = concat_296, end_mask = var_2982_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2982_cast_fp16")]; tensor concat_298x = const()[name = string("concat_298x"), val = tensor([1, -1, 20, 64])]; tensor var_2992_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2992_cast_fp16")]; tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_107_cast_fp16 = mul(x = var_2992_cast_fp16, y = const_212_to_fp16)[name = string("q_107_cast_fp16")]; tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, -1, 20, 64])]; tensor var_2999_cast_fp16 = reshape(shape = concat_299x, x = var_2979_cast_fp16)[name = string("op_2999_cast_fp16")]; tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_135_cast_fp16 = mul(x = var_2999_cast_fp16, y = const_213_to_fp16)[name = string("k_135_cast_fp16")]; tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, -1, 20, 64])]; tensor var_3006_cast_fp16 = reshape(shape = concat_300x, x = var_2982_cast_fp16)[name = string("op_3006_cast_fp16")]; tensor var_3007 = const()[name = string("op_3007"), val = tensor([0, 2, 1, 3])]; bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)]; bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)]; tensor transpose_309_perm_0 = const()[name = string("transpose_309_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_310_perm_0 = const()[name = string("transpose_310_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_310 = transpose(perm = transpose_310_perm_0, x = k_135_cast_fp16)[name = string("transpose_534")]; tensor transpose_309 = transpose(perm = transpose_309_perm_0, x = q_107_cast_fp16)[name = string("transpose_535")]; tensor qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_309, y = transpose_310)[name = string("qk_79_cast_fp16")]; int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)]; int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)]; bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)]; tensor concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")]; tensor var_3010_begin_0 = const()[name = string("op_3010_begin_0"), val = tensor([0, 0])]; tensor var_3010_end_mask_0 = const()[name = string("op_3010_end_mask_0"), val = tensor([false, true])]; tensor var_3010_cast_fp16 = slice_by_index(begin = var_3010_begin_0, end = concat_301, end_mask = var_3010_end_mask_0, x = mask_to_fp16)[name = string("op_3010_cast_fp16")]; int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)]; int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")]; tensor var_3011_begin_0 = const()[name = string("op_3011_begin_0"), val = tensor([0, 0])]; tensor var_3011_end_mask_0 = const()[name = string("op_3011_end_mask_0"), val = tensor([true, false])]; tensor var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = concat_302, end_mask = var_3011_end_mask_0, x = var_3010_cast_fp16)[name = string("op_3011_cast_fp16")]; tensor qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_3011_cast_fp16)[name = string("qk_81_cast_fp16")]; tensor var_3014_cast_fp16 = softmax(axis = var_2923, x = qk_81_cast_fp16)[name = string("op_3014_cast_fp16")]; bool var_3016_transpose_x_0 = const()[name = string("op_3016_transpose_x_0"), val = bool(false)]; bool var_3016_transpose_y_0 = const()[name = string("op_3016_transpose_y_0"), val = bool(false)]; tensor v_135_cast_fp16 = transpose(perm = var_3007, x = var_3006_cast_fp16)[name = string("transpose_536")]; tensor var_3016_cast_fp16 = matmul(transpose_x = var_3016_transpose_x_0, transpose_y = var_3016_transpose_y_0, x = var_3014_cast_fp16, y = v_135_cast_fp16)[name = string("op_3016_cast_fp16")]; tensor var_3017 = const()[name = string("op_3017"), val = tensor([0, 2, 1, 3])]; tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 1280])]; tensor var_3018_cast_fp16 = transpose(perm = var_3017, x = var_3016_cast_fp16)[name = string("transpose_533")]; tensor x_241_cast_fp16 = reshape(shape = concat_303x, x = var_3018_cast_fp16)[name = string("x_241_cast_fp16")]; tensor var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744936832)))]; tensor var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748213696)))]; tensor linear_107_cast_fp16 = linear(bias = var_3023_to_fp16, weight = var_3022_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")]; tensor var_3030_axes_0 = const()[name = string("op_3030_axes_0"), val = tensor([-1])]; tensor blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748216320)))]; tensor blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748218944)))]; tensor var_3030_cast_fp16 = layer_norm(axes = var_3030_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_3030_cast_fp16")]; tensor var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748221568)))]; tensor var_3040_to_fp16 = const()[name = string("op_3040_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751498432)))]; tensor linear_108_cast_fp16 = linear(bias = var_3040_to_fp16, weight = var_3039_to_fp16, x = var_3030_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor concat_304 = const()[name = string("concat_304"), val = tensor([0, 0, 0])]; tensor concat_305 = const()[name = string("concat_305"), val = tensor([0, 1500, 0])]; tensor k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")]; tensor concat_306 = const()[name = string("concat_306"), val = tensor([0, 0, 0])]; tensor concat_307 = const()[name = string("concat_307"), val = tensor([0, 1500, 0])]; tensor v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")]; tensor concat_308x = const()[name = string("concat_308x"), val = tensor([1, -1, 20, 64])]; tensor var_3060_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3060_cast_fp16")]; tensor const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_111_cast_fp16 = mul(x = var_3060_cast_fp16, y = const_214_to_fp16)[name = string("q_111_cast_fp16")]; tensor var_3066 = const()[name = string("op_3066"), val = tensor([1, 1500, 20, -1])]; tensor var_3067_cast_fp16 = reshape(shape = var_3066, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3067_cast_fp16")]; tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_139_cast_fp16 = mul(x = var_3067_cast_fp16, y = const_215_to_fp16)[name = string("k_139_cast_fp16")]; tensor var_3073 = const()[name = string("op_3073"), val = tensor([1, 1500, 20, -1])]; tensor var_3074_cast_fp16 = reshape(shape = var_3073, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3074_cast_fp16")]; tensor var_3075 = const()[name = string("op_3075"), val = tensor([0, 2, 1, 3])]; bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)]; bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)]; tensor transpose_311_perm_0 = const()[name = string("transpose_311_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_312_perm_0 = const()[name = string("transpose_312_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_312 = transpose(perm = transpose_312_perm_0, x = k_139_cast_fp16)[name = string("transpose_530")]; tensor transpose_311 = transpose(perm = transpose_311_perm_0, x = q_111_cast_fp16)[name = string("transpose_531")]; tensor qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_311, y = transpose_312)[name = string("qk_83_cast_fp16")]; tensor var_3079_cast_fp16 = softmax(axis = var_2923, x = qk_83_cast_fp16)[name = string("op_3079_cast_fp16")]; bool var_3081_transpose_x_0 = const()[name = string("op_3081_transpose_x_0"), val = bool(false)]; bool var_3081_transpose_y_0 = const()[name = string("op_3081_transpose_y_0"), val = bool(false)]; tensor v_139_cast_fp16 = transpose(perm = var_3075, x = var_3074_cast_fp16)[name = string("transpose_532")]; tensor var_3081_cast_fp16 = matmul(transpose_x = var_3081_transpose_x_0, transpose_y = var_3081_transpose_y_0, x = var_3079_cast_fp16, y = v_139_cast_fp16)[name = string("op_3081_cast_fp16")]; tensor var_3082 = const()[name = string("op_3082"), val = tensor([0, 2, 1, 3])]; tensor concat_309x = const()[name = string("concat_309x"), val = tensor([1, -1, 1280])]; tensor var_3083_cast_fp16 = transpose(perm = var_3082, x = var_3081_cast_fp16)[name = string("transpose_529")]; tensor x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3083_cast_fp16)[name = string("x_247_cast_fp16")]; tensor var_3087_to_fp16 = const()[name = string("op_3087_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751501056)))]; tensor var_3088_to_fp16 = const()[name = string("op_3088_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754777920)))]; tensor linear_109_cast_fp16 = linear(bias = var_3088_to_fp16, weight = var_3087_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")]; tensor var_3095_axes_0 = const()[name = string("op_3095_axes_0"), val = tensor([-1])]; tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754780544)))]; tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754783168)))]; tensor var_3095_cast_fp16 = layer_norm(axes = var_3095_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3095_cast_fp16")]; tensor var_3104_to_fp16 = const()[name = string("op_3104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754785792)))]; tensor var_3105_to_fp16 = const()[name = string("op_3105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767893056)))]; tensor linear_110_cast_fp16 = linear(bias = var_3105_to_fp16, weight = var_3104_to_fp16, x = var_3095_cast_fp16)[name = string("linear_110_cast_fp16")]; string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")]; tensor x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")]; tensor var_3110_to_fp16 = const()[name = string("op_3110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767903360)))]; tensor var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781010624)))]; tensor linear_111_cast_fp16 = linear(bias = var_3111_to_fp16, weight = var_3110_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")]; tensor k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; tensor k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor([15, 1, 448, 1280])]; tensor k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_57_cast_fp16")]; tensor v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; tensor v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor([15, 1, 448, 1280])]; tensor v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_57_cast_fp16")]; tensor k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; tensor k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor([15, 1, 1500, 1280])]; tensor k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")]; tensor v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; tensor v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor([15, 1, 1500, 1280])]; tensor v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")]; int32 var_3134 = const()[name = string("op_3134"), val = int32(-1)]; tensor var_3152_axes_0 = const()[name = string("op_3152_axes_0"), val = tensor([-1])]; tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781013248)))]; tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781015872)))]; fp16 var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3152_cast_fp16 = layer_norm(axes = var_3152_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3152_cast_fp16")]; tensor var_3163_to_fp16 = const()[name = string("op_3163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781018496)))]; tensor var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784295360)))]; tensor linear_112_cast_fp16 = linear(bias = var_3164_to_fp16, weight = var_3163_to_fp16, x = var_3152_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor var_3167_to_fp16 = const()[name = string("op_3167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784297984)))]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3167_to_fp16, x = var_3152_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787574848)))]; tensor var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790851712)))]; tensor linear_114_cast_fp16 = linear(bias = var_3172_to_fp16, weight = var_3171_to_fp16, x = var_3152_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor var_3174_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3174_shape_cast_fp16")]; int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)]; int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)]; bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)]; string var_3174_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3174_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)]; tensor var_3174_shape_cast_fp16_to_uint16 = cast(dtype = var_3174_shape_cast_fp16_to_uint16_dtype_0, x = var_3174_shape_cast_fp16)[name = string("cast_362")]; uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3174_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")]; string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_361")]; int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")]; tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([0])]; tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([0])]; tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")]; tensor concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor([14])]; int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")]; tensor concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor([0])]; tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")]; tensor k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")]; tensor v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")]; int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)]; int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1280)]; int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)]; bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)]; tensor concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")]; tensor var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor([0, 0, 0])]; tensor var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor([true, false, true])]; tensor var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_318, end_mask = var_3190_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3190_cast_fp16")]; tensor var_3193_begin_0 = const()[name = string("op_3193_begin_0"), val = tensor([0, 0, 0])]; tensor var_3193_end_mask_0 = const()[name = string("op_3193_end_mask_0"), val = tensor([true, false, true])]; tensor var_3193_cast_fp16 = slice_by_index(begin = var_3193_begin_0, end = concat_318, end_mask = var_3193_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3193_cast_fp16")]; tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, -1, 20, 64])]; tensor var_3203_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3203_cast_fp16")]; tensor const_216_to_fp16 = const()[name = string("const_216_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_115_cast_fp16 = mul(x = var_3203_cast_fp16, y = const_216_to_fp16)[name = string("q_115_cast_fp16")]; tensor concat_321x = const()[name = string("concat_321x"), val = tensor([1, -1, 20, 64])]; tensor var_3210_cast_fp16 = reshape(shape = concat_321x, x = var_3190_cast_fp16)[name = string("op_3210_cast_fp16")]; tensor const_217_to_fp16 = const()[name = string("const_217_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_145_cast_fp16 = mul(x = var_3210_cast_fp16, y = const_217_to_fp16)[name = string("k_145_cast_fp16")]; tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 20, 64])]; tensor var_3217_cast_fp16 = reshape(shape = concat_322x, x = var_3193_cast_fp16)[name = string("op_3217_cast_fp16")]; tensor var_3218 = const()[name = string("op_3218"), val = tensor([0, 2, 1, 3])]; bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)]; bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)]; tensor transpose_313_perm_0 = const()[name = string("transpose_313_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_314_perm_0 = const()[name = string("transpose_314_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_314 = transpose(perm = transpose_314_perm_0, x = k_145_cast_fp16)[name = string("transpose_526")]; tensor transpose_313 = transpose(perm = transpose_313_perm_0, x = q_115_cast_fp16)[name = string("transpose_527")]; tensor qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_313, y = transpose_314)[name = string("qk_85_cast_fp16")]; int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)]; int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)]; bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)]; tensor concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")]; tensor var_3221_begin_0 = const()[name = string("op_3221_begin_0"), val = tensor([0, 0])]; tensor var_3221_end_mask_0 = const()[name = string("op_3221_end_mask_0"), val = tensor([false, true])]; tensor var_3221_cast_fp16 = slice_by_index(begin = var_3221_begin_0, end = concat_323, end_mask = var_3221_end_mask_0, x = mask_to_fp16)[name = string("op_3221_cast_fp16")]; int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)]; int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)]; bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)]; tensor concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")]; tensor var_3222_begin_0 = const()[name = string("op_3222_begin_0"), val = tensor([0, 0])]; tensor var_3222_end_mask_0 = const()[name = string("op_3222_end_mask_0"), val = tensor([true, false])]; tensor var_3222_cast_fp16 = slice_by_index(begin = var_3222_begin_0, end = concat_324, end_mask = var_3222_end_mask_0, x = var_3221_cast_fp16)[name = string("op_3222_cast_fp16")]; tensor qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3222_cast_fp16)[name = string("qk_87_cast_fp16")]; tensor var_3225_cast_fp16 = softmax(axis = var_3134, x = qk_87_cast_fp16)[name = string("op_3225_cast_fp16")]; bool var_3227_transpose_x_0 = const()[name = string("op_3227_transpose_x_0"), val = bool(false)]; bool var_3227_transpose_y_0 = const()[name = string("op_3227_transpose_y_0"), val = bool(false)]; tensor v_145_cast_fp16 = transpose(perm = var_3218, x = var_3217_cast_fp16)[name = string("transpose_528")]; tensor var_3227_cast_fp16 = matmul(transpose_x = var_3227_transpose_x_0, transpose_y = var_3227_transpose_y_0, x = var_3225_cast_fp16, y = v_145_cast_fp16)[name = string("op_3227_cast_fp16")]; tensor var_3228 = const()[name = string("op_3228"), val = tensor([0, 2, 1, 3])]; tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 1280])]; tensor var_3229_cast_fp16 = transpose(perm = var_3228, x = var_3227_cast_fp16)[name = string("transpose_525")]; tensor x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3229_cast_fp16)[name = string("x_259_cast_fp16")]; tensor var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790854336)))]; tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794131200)))]; tensor linear_115_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")]; tensor var_3241_axes_0 = const()[name = string("op_3241_axes_0"), val = tensor([-1])]; tensor blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794133824)))]; tensor blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794136448)))]; tensor var_3241_cast_fp16 = layer_norm(axes = var_3241_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3241_cast_fp16")]; tensor var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794139072)))]; tensor var_3251_to_fp16 = const()[name = string("op_3251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797415936)))]; tensor linear_116_cast_fp16 = linear(bias = var_3251_to_fp16, weight = var_3250_to_fp16, x = var_3241_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor concat_326 = const()[name = string("concat_326"), val = tensor([0, 0, 0])]; tensor concat_327 = const()[name = string("concat_327"), val = tensor([0, 1500, 0])]; tensor k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")]; tensor concat_328 = const()[name = string("concat_328"), val = tensor([0, 0, 0])]; tensor concat_329 = const()[name = string("concat_329"), val = tensor([0, 1500, 0])]; tensor v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")]; tensor concat_330x = const()[name = string("concat_330x"), val = tensor([1, -1, 20, 64])]; tensor var_3271_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3271_cast_fp16")]; tensor const_218_to_fp16 = const()[name = string("const_218_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_119_cast_fp16 = mul(x = var_3271_cast_fp16, y = const_218_to_fp16)[name = string("q_119_cast_fp16")]; tensor var_3277 = const()[name = string("op_3277"), val = tensor([1, 1500, 20, -1])]; tensor var_3278_cast_fp16 = reshape(shape = var_3277, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3278_cast_fp16")]; tensor const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_149_cast_fp16 = mul(x = var_3278_cast_fp16, y = const_219_to_fp16)[name = string("k_149_cast_fp16")]; tensor var_3284 = const()[name = string("op_3284"), val = tensor([1, 1500, 20, -1])]; tensor var_3285_cast_fp16 = reshape(shape = var_3284, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3285_cast_fp16")]; tensor var_3286 = const()[name = string("op_3286"), val = tensor([0, 2, 1, 3])]; bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)]; bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)]; tensor transpose_315_perm_0 = const()[name = string("transpose_315_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_316_perm_0 = const()[name = string("transpose_316_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_316 = transpose(perm = transpose_316_perm_0, x = k_149_cast_fp16)[name = string("transpose_522")]; tensor transpose_315 = transpose(perm = transpose_315_perm_0, x = q_119_cast_fp16)[name = string("transpose_523")]; tensor qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_315, y = transpose_316)[name = string("qk_89_cast_fp16")]; tensor var_3290_cast_fp16 = softmax(axis = var_3134, x = qk_89_cast_fp16)[name = string("op_3290_cast_fp16")]; bool var_3292_transpose_x_0 = const()[name = string("op_3292_transpose_x_0"), val = bool(false)]; bool var_3292_transpose_y_0 = const()[name = string("op_3292_transpose_y_0"), val = bool(false)]; tensor v_149_cast_fp16 = transpose(perm = var_3286, x = var_3285_cast_fp16)[name = string("transpose_524")]; tensor var_3292_cast_fp16 = matmul(transpose_x = var_3292_transpose_x_0, transpose_y = var_3292_transpose_y_0, x = var_3290_cast_fp16, y = v_149_cast_fp16)[name = string("op_3292_cast_fp16")]; tensor var_3293 = const()[name = string("op_3293"), val = tensor([0, 2, 1, 3])]; tensor concat_331x = const()[name = string("concat_331x"), val = tensor([1, -1, 1280])]; tensor var_3294_cast_fp16 = transpose(perm = var_3293, x = var_3292_cast_fp16)[name = string("transpose_521")]; tensor x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3294_cast_fp16)[name = string("x_265_cast_fp16")]; tensor var_3298_to_fp16 = const()[name = string("op_3298_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797418560)))]; tensor var_3299_to_fp16 = const()[name = string("op_3299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800695424)))]; tensor linear_117_cast_fp16 = linear(bias = var_3299_to_fp16, weight = var_3298_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")]; tensor var_3306_axes_0 = const()[name = string("op_3306_axes_0"), val = tensor([-1])]; tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800698048)))]; tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800700672)))]; tensor var_3306_cast_fp16 = layer_norm(axes = var_3306_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3306_cast_fp16")]; tensor var_3315_to_fp16 = const()[name = string("op_3315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800703296)))]; tensor var_3316_to_fp16 = const()[name = string("op_3316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813810560)))]; tensor linear_118_cast_fp16 = linear(bias = var_3316_to_fp16, weight = var_3315_to_fp16, x = var_3306_cast_fp16)[name = string("linear_118_cast_fp16")]; string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")]; tensor x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")]; tensor var_3321_to_fp16 = const()[name = string("op_3321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813820864)))]; tensor var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826928128)))]; tensor linear_119_cast_fp16 = linear(bias = var_3322_to_fp16, weight = var_3321_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")]; tensor k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; tensor k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor([16, 1, 448, 1280])]; tensor k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_61_cast_fp16")]; tensor v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; tensor v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor([16, 1, 448, 1280])]; tensor v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_61_cast_fp16")]; tensor k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; tensor k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor([16, 1, 1500, 1280])]; tensor k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")]; tensor v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; tensor v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor([16, 1, 1500, 1280])]; tensor v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")]; int32 var_3345 = const()[name = string("op_3345"), val = int32(-1)]; tensor var_3363_axes_0 = const()[name = string("op_3363_axes_0"), val = tensor([-1])]; tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826930752)))]; tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826933376)))]; fp16 var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3363_cast_fp16 = layer_norm(axes = var_3363_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3363_cast_fp16")]; tensor var_3374_to_fp16 = const()[name = string("op_3374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826936000)))]; tensor var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830212864)))]; tensor linear_120_cast_fp16 = linear(bias = var_3375_to_fp16, weight = var_3374_to_fp16, x = var_3363_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor var_3378_to_fp16 = const()[name = string("op_3378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830215488)))]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3378_to_fp16, x = var_3363_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor var_3382_to_fp16 = const()[name = string("op_3382_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833492352)))]; tensor var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836769216)))]; tensor linear_122_cast_fp16 = linear(bias = var_3383_to_fp16, weight = var_3382_to_fp16, x = var_3363_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor var_3385_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3385_shape_cast_fp16")]; int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)]; int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)]; bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)]; string var_3385_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3385_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)]; tensor var_3385_shape_cast_fp16_to_uint16 = cast(dtype = var_3385_shape_cast_fp16_to_uint16_dtype_0, x = var_3385_shape_cast_fp16)[name = string("cast_360")]; uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3385_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")]; string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_359")]; int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([0])]; tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")]; tensor concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor([15])]; int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)]; bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)]; tensor concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")]; tensor concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor([0])]; tensor concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor([0])]; tensor concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor([0])]; int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)]; bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)]; tensor concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")]; tensor k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_94 = read_state(input = k_cache1)[name = string("coreml_update_state_94")]; tensor v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_95 = read_state(input = v_cache1)[name = string("coreml_update_state_95")]; int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1280)]; int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")]; tensor var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor([0, 0, 0])]; tensor var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor([true, false, true])]; tensor var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_340, end_mask = var_3401_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3401_cast_fp16")]; tensor var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor([0, 0, 0])]; tensor var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor([true, false, true])]; tensor var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = concat_340, end_mask = var_3404_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3404_cast_fp16")]; tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 20, 64])]; tensor var_3414_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3414_cast_fp16")]; tensor const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_123_cast_fp16 = mul(x = var_3414_cast_fp16, y = const_220_to_fp16)[name = string("q_123_cast_fp16")]; tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 20, 64])]; tensor var_3421_cast_fp16 = reshape(shape = concat_343x, x = var_3401_cast_fp16)[name = string("op_3421_cast_fp16")]; tensor const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_155_cast_fp16 = mul(x = var_3421_cast_fp16, y = const_221_to_fp16)[name = string("k_155_cast_fp16")]; tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 20, 64])]; tensor var_3428_cast_fp16 = reshape(shape = concat_344x, x = var_3404_cast_fp16)[name = string("op_3428_cast_fp16")]; tensor var_3429 = const()[name = string("op_3429"), val = tensor([0, 2, 1, 3])]; bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)]; bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)]; tensor transpose_317_perm_0 = const()[name = string("transpose_317_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_318_perm_0 = const()[name = string("transpose_318_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_318 = transpose(perm = transpose_318_perm_0, x = k_155_cast_fp16)[name = string("transpose_518")]; tensor transpose_317 = transpose(perm = transpose_317_perm_0, x = q_123_cast_fp16)[name = string("transpose_519")]; tensor qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_317, y = transpose_318)[name = string("qk_91_cast_fp16")]; int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)]; int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")]; tensor var_3432_begin_0 = const()[name = string("op_3432_begin_0"), val = tensor([0, 0])]; tensor var_3432_end_mask_0 = const()[name = string("op_3432_end_mask_0"), val = tensor([false, true])]; tensor var_3432_cast_fp16 = slice_by_index(begin = var_3432_begin_0, end = concat_345, end_mask = var_3432_end_mask_0, x = mask_to_fp16)[name = string("op_3432_cast_fp16")]; int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)]; int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)]; bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)]; tensor concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")]; tensor var_3433_begin_0 = const()[name = string("op_3433_begin_0"), val = tensor([0, 0])]; tensor var_3433_end_mask_0 = const()[name = string("op_3433_end_mask_0"), val = tensor([true, false])]; tensor var_3433_cast_fp16 = slice_by_index(begin = var_3433_begin_0, end = concat_346, end_mask = var_3433_end_mask_0, x = var_3432_cast_fp16)[name = string("op_3433_cast_fp16")]; tensor qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3433_cast_fp16)[name = string("qk_93_cast_fp16")]; tensor var_3436_cast_fp16 = softmax(axis = var_3345, x = qk_93_cast_fp16)[name = string("op_3436_cast_fp16")]; bool var_3438_transpose_x_0 = const()[name = string("op_3438_transpose_x_0"), val = bool(false)]; bool var_3438_transpose_y_0 = const()[name = string("op_3438_transpose_y_0"), val = bool(false)]; tensor v_155_cast_fp16 = transpose(perm = var_3429, x = var_3428_cast_fp16)[name = string("transpose_520")]; tensor var_3438_cast_fp16 = matmul(transpose_x = var_3438_transpose_x_0, transpose_y = var_3438_transpose_y_0, x = var_3436_cast_fp16, y = v_155_cast_fp16)[name = string("op_3438_cast_fp16")]; tensor var_3439 = const()[name = string("op_3439"), val = tensor([0, 2, 1, 3])]; tensor concat_347x = const()[name = string("concat_347x"), val = tensor([1, -1, 1280])]; tensor var_3440_cast_fp16 = transpose(perm = var_3439, x = var_3438_cast_fp16)[name = string("transpose_517")]; tensor x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3440_cast_fp16)[name = string("x_277_cast_fp16")]; tensor var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836771840)))]; tensor var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840048704)))]; tensor linear_123_cast_fp16 = linear(bias = var_3445_to_fp16, weight = var_3444_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")]; tensor var_3452_axes_0 = const()[name = string("op_3452_axes_0"), val = tensor([-1])]; tensor blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840051328)))]; tensor blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840053952)))]; tensor var_3452_cast_fp16 = layer_norm(axes = var_3452_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3452_cast_fp16")]; tensor var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840056576)))]; tensor var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843333440)))]; tensor linear_124_cast_fp16 = linear(bias = var_3462_to_fp16, weight = var_3461_to_fp16, x = var_3452_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor concat_348 = const()[name = string("concat_348"), val = tensor([0, 0, 0])]; tensor concat_349 = const()[name = string("concat_349"), val = tensor([0, 1500, 0])]; tensor k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")]; tensor concat_350 = const()[name = string("concat_350"), val = tensor([0, 0, 0])]; tensor concat_351 = const()[name = string("concat_351"), val = tensor([0, 1500, 0])]; tensor v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")]; tensor concat_352x = const()[name = string("concat_352x"), val = tensor([1, -1, 20, 64])]; tensor var_3482_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3482_cast_fp16")]; tensor const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_127_cast_fp16 = mul(x = var_3482_cast_fp16, y = const_222_to_fp16)[name = string("q_127_cast_fp16")]; tensor var_3488 = const()[name = string("op_3488"), val = tensor([1, 1500, 20, -1])]; tensor var_3489_cast_fp16 = reshape(shape = var_3488, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3489_cast_fp16")]; tensor const_223_to_fp16 = const()[name = string("const_223_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_159_cast_fp16 = mul(x = var_3489_cast_fp16, y = const_223_to_fp16)[name = string("k_159_cast_fp16")]; tensor var_3495 = const()[name = string("op_3495"), val = tensor([1, 1500, 20, -1])]; tensor var_3496_cast_fp16 = reshape(shape = var_3495, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3496_cast_fp16")]; tensor var_3497 = const()[name = string("op_3497"), val = tensor([0, 2, 1, 3])]; bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)]; bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)]; tensor transpose_319_perm_0 = const()[name = string("transpose_319_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_320_perm_0 = const()[name = string("transpose_320_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_320 = transpose(perm = transpose_320_perm_0, x = k_159_cast_fp16)[name = string("transpose_514")]; tensor transpose_319 = transpose(perm = transpose_319_perm_0, x = q_127_cast_fp16)[name = string("transpose_515")]; tensor qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_319, y = transpose_320)[name = string("qk_95_cast_fp16")]; tensor var_3501_cast_fp16 = softmax(axis = var_3345, x = qk_95_cast_fp16)[name = string("op_3501_cast_fp16")]; bool var_3503_transpose_x_0 = const()[name = string("op_3503_transpose_x_0"), val = bool(false)]; bool var_3503_transpose_y_0 = const()[name = string("op_3503_transpose_y_0"), val = bool(false)]; tensor v_159_cast_fp16 = transpose(perm = var_3497, x = var_3496_cast_fp16)[name = string("transpose_516")]; tensor var_3503_cast_fp16 = matmul(transpose_x = var_3503_transpose_x_0, transpose_y = var_3503_transpose_y_0, x = var_3501_cast_fp16, y = v_159_cast_fp16)[name = string("op_3503_cast_fp16")]; tensor var_3504 = const()[name = string("op_3504"), val = tensor([0, 2, 1, 3])]; tensor concat_353x = const()[name = string("concat_353x"), val = tensor([1, -1, 1280])]; tensor var_3505_cast_fp16 = transpose(perm = var_3504, x = var_3503_cast_fp16)[name = string("transpose_513")]; tensor x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3505_cast_fp16)[name = string("x_283_cast_fp16")]; tensor var_3509_to_fp16 = const()[name = string("op_3509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843336064)))]; tensor var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846612928)))]; tensor linear_125_cast_fp16 = linear(bias = var_3510_to_fp16, weight = var_3509_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")]; tensor var_3517_axes_0 = const()[name = string("op_3517_axes_0"), val = tensor([-1])]; tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846615552)))]; tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846618176)))]; tensor var_3517_cast_fp16 = layer_norm(axes = var_3517_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3517_cast_fp16")]; tensor var_3526_to_fp16 = const()[name = string("op_3526_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846620800)))]; tensor var_3527_to_fp16 = const()[name = string("op_3527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859728064)))]; tensor linear_126_cast_fp16 = linear(bias = var_3527_to_fp16, weight = var_3526_to_fp16, x = var_3517_cast_fp16)[name = string("linear_126_cast_fp16")]; string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")]; tensor x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")]; tensor var_3532_to_fp16 = const()[name = string("op_3532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859738368)))]; tensor var_3533_to_fp16 = const()[name = string("op_3533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872845632)))]; tensor linear_127_cast_fp16 = linear(bias = var_3533_to_fp16, weight = var_3532_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")]; tensor k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; tensor k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor([17, 1, 448, 1280])]; tensor k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_94)[name = string("k_cache_65_cast_fp16")]; tensor v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; tensor v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor([17, 1, 448, 1280])]; tensor v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_95)[name = string("v_cache_65_cast_fp16")]; tensor k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; tensor k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor([17, 1, 1500, 1280])]; tensor k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")]; tensor v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; tensor v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor([17, 1, 1500, 1280])]; tensor v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")]; int32 var_3556 = const()[name = string("op_3556"), val = int32(-1)]; tensor var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor([-1])]; tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872848256)))]; tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872850880)))]; fp16 var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3574_cast_fp16 = layer_norm(axes = var_3574_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3574_cast_fp16")]; tensor var_3585_to_fp16 = const()[name = string("op_3585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872853504)))]; tensor var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876130368)))]; tensor linear_128_cast_fp16 = linear(bias = var_3586_to_fp16, weight = var_3585_to_fp16, x = var_3574_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876132992)))]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3589_to_fp16, x = var_3574_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor var_3593_to_fp16 = const()[name = string("op_3593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(879409856)))]; tensor var_3594_to_fp16 = const()[name = string("op_3594_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882686720)))]; tensor linear_130_cast_fp16 = linear(bias = var_3594_to_fp16, weight = var_3593_to_fp16, x = var_3574_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_3596_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3596_shape_cast_fp16")]; int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)]; int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)]; bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)]; string var_3596_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3596_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)]; tensor var_3596_shape_cast_fp16_to_uint16 = cast(dtype = var_3596_shape_cast_fp16_to_uint16_dtype_0, x = var_3596_shape_cast_fp16)[name = string("cast_358")]; uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3596_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")]; string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_357")]; int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")]; tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([0])]; tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([0])]; tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")]; tensor concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor([16])]; int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)]; bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)]; tensor concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")]; tensor concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor([0])]; tensor concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor([0])]; tensor concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor([0])]; int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")]; tensor k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_94)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_96_write_state")]; tensor coreml_update_state_96 = read_state(input = k_cache1)[name = string("coreml_update_state_96")]; tensor v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_95)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_97_write_state")]; tensor coreml_update_state_97 = read_state(input = v_cache1)[name = string("coreml_update_state_97")]; int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)]; int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1280)]; int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")]; tensor var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor([0, 0, 0])]; tensor var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor([true, false, true])]; tensor var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_362, end_mask = var_3612_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3612_cast_fp16")]; tensor var_3615_begin_0 = const()[name = string("op_3615_begin_0"), val = tensor([0, 0, 0])]; tensor var_3615_end_mask_0 = const()[name = string("op_3615_end_mask_0"), val = tensor([true, false, true])]; tensor var_3615_cast_fp16 = slice_by_index(begin = var_3615_begin_0, end = concat_362, end_mask = var_3615_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3615_cast_fp16")]; tensor concat_364x = const()[name = string("concat_364x"), val = tensor([1, -1, 20, 64])]; tensor var_3625_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3625_cast_fp16")]; tensor const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_131_cast_fp16 = mul(x = var_3625_cast_fp16, y = const_224_to_fp16)[name = string("q_131_cast_fp16")]; tensor concat_365x = const()[name = string("concat_365x"), val = tensor([1, -1, 20, 64])]; tensor var_3632_cast_fp16 = reshape(shape = concat_365x, x = var_3612_cast_fp16)[name = string("op_3632_cast_fp16")]; tensor const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_165_cast_fp16 = mul(x = var_3632_cast_fp16, y = const_225_to_fp16)[name = string("k_165_cast_fp16")]; tensor concat_366x = const()[name = string("concat_366x"), val = tensor([1, -1, 20, 64])]; tensor var_3639_cast_fp16 = reshape(shape = concat_366x, x = var_3615_cast_fp16)[name = string("op_3639_cast_fp16")]; tensor var_3640 = const()[name = string("op_3640"), val = tensor([0, 2, 1, 3])]; bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)]; bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)]; tensor transpose_321_perm_0 = const()[name = string("transpose_321_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_322_perm_0 = const()[name = string("transpose_322_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_322 = transpose(perm = transpose_322_perm_0, x = k_165_cast_fp16)[name = string("transpose_510")]; tensor transpose_321 = transpose(perm = transpose_321_perm_0, x = q_131_cast_fp16)[name = string("transpose_511")]; tensor qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_321, y = transpose_322)[name = string("qk_97_cast_fp16")]; int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)]; int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")]; tensor var_3643_begin_0 = const()[name = string("op_3643_begin_0"), val = tensor([0, 0])]; tensor var_3643_end_mask_0 = const()[name = string("op_3643_end_mask_0"), val = tensor([false, true])]; tensor var_3643_cast_fp16 = slice_by_index(begin = var_3643_begin_0, end = concat_367, end_mask = var_3643_end_mask_0, x = mask_to_fp16)[name = string("op_3643_cast_fp16")]; int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)]; int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)]; bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)]; tensor concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")]; tensor var_3644_begin_0 = const()[name = string("op_3644_begin_0"), val = tensor([0, 0])]; tensor var_3644_end_mask_0 = const()[name = string("op_3644_end_mask_0"), val = tensor([true, false])]; tensor var_3644_cast_fp16 = slice_by_index(begin = var_3644_begin_0, end = concat_368, end_mask = var_3644_end_mask_0, x = var_3643_cast_fp16)[name = string("op_3644_cast_fp16")]; tensor qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3644_cast_fp16)[name = string("qk_99_cast_fp16")]; tensor var_3647_cast_fp16 = softmax(axis = var_3556, x = qk_99_cast_fp16)[name = string("op_3647_cast_fp16")]; bool var_3649_transpose_x_0 = const()[name = string("op_3649_transpose_x_0"), val = bool(false)]; bool var_3649_transpose_y_0 = const()[name = string("op_3649_transpose_y_0"), val = bool(false)]; tensor v_165_cast_fp16 = transpose(perm = var_3640, x = var_3639_cast_fp16)[name = string("transpose_512")]; tensor var_3649_cast_fp16 = matmul(transpose_x = var_3649_transpose_x_0, transpose_y = var_3649_transpose_y_0, x = var_3647_cast_fp16, y = v_165_cast_fp16)[name = string("op_3649_cast_fp16")]; tensor var_3650 = const()[name = string("op_3650"), val = tensor([0, 2, 1, 3])]; tensor concat_369x = const()[name = string("concat_369x"), val = tensor([1, -1, 1280])]; tensor var_3651_cast_fp16 = transpose(perm = var_3650, x = var_3649_cast_fp16)[name = string("transpose_509")]; tensor x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3651_cast_fp16)[name = string("x_295_cast_fp16")]; tensor var_3655_to_fp16 = const()[name = string("op_3655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882689344)))]; tensor var_3656_to_fp16 = const()[name = string("op_3656_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885966208)))]; tensor linear_131_cast_fp16 = linear(bias = var_3656_to_fp16, weight = var_3655_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")]; tensor var_3663_axes_0 = const()[name = string("op_3663_axes_0"), val = tensor([-1])]; tensor blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885968832)))]; tensor blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885971456)))]; tensor var_3663_cast_fp16 = layer_norm(axes = var_3663_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3663_cast_fp16")]; tensor var_3672_to_fp16 = const()[name = string("op_3672_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885974080)))]; tensor var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889250944)))]; tensor linear_132_cast_fp16 = linear(bias = var_3673_to_fp16, weight = var_3672_to_fp16, x = var_3663_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor concat_370 = const()[name = string("concat_370"), val = tensor([0, 0, 0])]; tensor concat_371 = const()[name = string("concat_371"), val = tensor([0, 1500, 0])]; tensor k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")]; tensor concat_372 = const()[name = string("concat_372"), val = tensor([0, 0, 0])]; tensor concat_373 = const()[name = string("concat_373"), val = tensor([0, 1500, 0])]; tensor v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")]; tensor concat_374x = const()[name = string("concat_374x"), val = tensor([1, -1, 20, 64])]; tensor var_3693_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3693_cast_fp16")]; tensor const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_135_cast_fp16 = mul(x = var_3693_cast_fp16, y = const_226_to_fp16)[name = string("q_135_cast_fp16")]; tensor var_3699 = const()[name = string("op_3699"), val = tensor([1, 1500, 20, -1])]; tensor var_3700_cast_fp16 = reshape(shape = var_3699, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3700_cast_fp16")]; tensor const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_169_cast_fp16 = mul(x = var_3700_cast_fp16, y = const_227_to_fp16)[name = string("k_169_cast_fp16")]; tensor var_3706 = const()[name = string("op_3706"), val = tensor([1, 1500, 20, -1])]; tensor var_3707_cast_fp16 = reshape(shape = var_3706, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3707_cast_fp16")]; tensor var_3708 = const()[name = string("op_3708"), val = tensor([0, 2, 1, 3])]; bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)]; bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)]; tensor transpose_323_perm_0 = const()[name = string("transpose_323_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_324_perm_0 = const()[name = string("transpose_324_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_324 = transpose(perm = transpose_324_perm_0, x = k_169_cast_fp16)[name = string("transpose_506")]; tensor transpose_323 = transpose(perm = transpose_323_perm_0, x = q_135_cast_fp16)[name = string("transpose_507")]; tensor qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_323, y = transpose_324)[name = string("qk_101_cast_fp16")]; tensor var_3712_cast_fp16 = softmax(axis = var_3556, x = qk_101_cast_fp16)[name = string("op_3712_cast_fp16")]; bool var_3714_transpose_x_0 = const()[name = string("op_3714_transpose_x_0"), val = bool(false)]; bool var_3714_transpose_y_0 = const()[name = string("op_3714_transpose_y_0"), val = bool(false)]; tensor v_169_cast_fp16 = transpose(perm = var_3708, x = var_3707_cast_fp16)[name = string("transpose_508")]; tensor var_3714_cast_fp16 = matmul(transpose_x = var_3714_transpose_x_0, transpose_y = var_3714_transpose_y_0, x = var_3712_cast_fp16, y = v_169_cast_fp16)[name = string("op_3714_cast_fp16")]; tensor var_3715 = const()[name = string("op_3715"), val = tensor([0, 2, 1, 3])]; tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, -1, 1280])]; tensor var_3716_cast_fp16 = transpose(perm = var_3715, x = var_3714_cast_fp16)[name = string("transpose_505")]; tensor x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3716_cast_fp16)[name = string("x_301_cast_fp16")]; tensor var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889253568)))]; tensor var_3721_to_fp16 = const()[name = string("op_3721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892530432)))]; tensor linear_133_cast_fp16 = linear(bias = var_3721_to_fp16, weight = var_3720_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")]; tensor var_3728_axes_0 = const()[name = string("op_3728_axes_0"), val = tensor([-1])]; tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892533056)))]; tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892535680)))]; tensor var_3728_cast_fp16 = layer_norm(axes = var_3728_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3728_cast_fp16")]; tensor var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892538304)))]; tensor var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905645568)))]; tensor linear_134_cast_fp16 = linear(bias = var_3738_to_fp16, weight = var_3737_to_fp16, x = var_3728_cast_fp16)[name = string("linear_134_cast_fp16")]; string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")]; tensor x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")]; tensor var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905655872)))]; tensor var_3744_to_fp16 = const()[name = string("op_3744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918763136)))]; tensor linear_135_cast_fp16 = linear(bias = var_3744_to_fp16, weight = var_3743_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")]; tensor k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; tensor k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor([18, 1, 448, 1280])]; tensor k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_96)[name = string("k_cache_69_cast_fp16")]; tensor v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; tensor v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor([18, 1, 448, 1280])]; tensor v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_97)[name = string("v_cache_69_cast_fp16")]; tensor k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; tensor k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor([18, 1, 1500, 1280])]; tensor k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")]; tensor v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; tensor v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor([18, 1, 1500, 1280])]; tensor v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")]; int32 var_3767 = const()[name = string("op_3767"), val = int32(-1)]; tensor var_3785_axes_0 = const()[name = string("op_3785_axes_0"), val = tensor([-1])]; tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918765760)))]; tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918768384)))]; fp16 var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3785_cast_fp16 = layer_norm(axes = var_3785_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3785_cast_fp16")]; tensor var_3796_to_fp16 = const()[name = string("op_3796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918771008)))]; tensor var_3797_to_fp16 = const()[name = string("op_3797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922047872)))]; tensor linear_136_cast_fp16 = linear(bias = var_3797_to_fp16, weight = var_3796_to_fp16, x = var_3785_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922050496)))]; tensor linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3800_to_fp16, x = var_3785_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_3804_to_fp16 = const()[name = string("op_3804_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925327360)))]; tensor var_3805_to_fp16 = const()[name = string("op_3805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928604224)))]; tensor linear_138_cast_fp16 = linear(bias = var_3805_to_fp16, weight = var_3804_to_fp16, x = var_3785_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor var_3807_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3807_shape_cast_fp16")]; int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)]; int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)]; bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)]; string var_3807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)]; tensor var_3807_shape_cast_fp16_to_uint16 = cast(dtype = var_3807_shape_cast_fp16_to_uint16_dtype_0, x = var_3807_shape_cast_fp16)[name = string("cast_356")]; uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3807_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")]; string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_355")]; int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")]; tensor expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor([0])]; tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([0])]; tensor expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor([0])]; tensor expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")]; tensor concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor([17])]; int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")]; tensor concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor([0])]; tensor concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor([0])]; tensor concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor([0])]; int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")]; tensor k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_96)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_98_write_state")]; tensor coreml_update_state_98 = read_state(input = k_cache1)[name = string("coreml_update_state_98")]; tensor v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_97)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_99_write_state")]; tensor coreml_update_state_99 = read_state(input = v_cache1)[name = string("coreml_update_state_99")]; int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)]; int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1280)]; int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")]; tensor var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor([0, 0, 0])]; tensor var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor([true, false, true])]; tensor var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_384, end_mask = var_3823_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3823_cast_fp16")]; tensor var_3826_begin_0 = const()[name = string("op_3826_begin_0"), val = tensor([0, 0, 0])]; tensor var_3826_end_mask_0 = const()[name = string("op_3826_end_mask_0"), val = tensor([true, false, true])]; tensor var_3826_cast_fp16 = slice_by_index(begin = var_3826_begin_0, end = concat_384, end_mask = var_3826_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3826_cast_fp16")]; tensor concat_386x = const()[name = string("concat_386x"), val = tensor([1, -1, 20, 64])]; tensor var_3836_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3836_cast_fp16")]; tensor const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_139_cast_fp16 = mul(x = var_3836_cast_fp16, y = const_228_to_fp16)[name = string("q_139_cast_fp16")]; tensor concat_387x = const()[name = string("concat_387x"), val = tensor([1, -1, 20, 64])]; tensor var_3843_cast_fp16 = reshape(shape = concat_387x, x = var_3823_cast_fp16)[name = string("op_3843_cast_fp16")]; tensor const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_175_cast_fp16 = mul(x = var_3843_cast_fp16, y = const_229_to_fp16)[name = string("k_175_cast_fp16")]; tensor concat_388x = const()[name = string("concat_388x"), val = tensor([1, -1, 20, 64])]; tensor var_3850_cast_fp16 = reshape(shape = concat_388x, x = var_3826_cast_fp16)[name = string("op_3850_cast_fp16")]; tensor var_3851 = const()[name = string("op_3851"), val = tensor([0, 2, 1, 3])]; bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)]; bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)]; tensor transpose_325_perm_0 = const()[name = string("transpose_325_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_326_perm_0 = const()[name = string("transpose_326_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_326 = transpose(perm = transpose_326_perm_0, x = k_175_cast_fp16)[name = string("transpose_502")]; tensor transpose_325 = transpose(perm = transpose_325_perm_0, x = q_139_cast_fp16)[name = string("transpose_503")]; tensor qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_325, y = transpose_326)[name = string("qk_103_cast_fp16")]; int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)]; int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)]; bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)]; tensor concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")]; tensor var_3854_begin_0 = const()[name = string("op_3854_begin_0"), val = tensor([0, 0])]; tensor var_3854_end_mask_0 = const()[name = string("op_3854_end_mask_0"), val = tensor([false, true])]; tensor var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = concat_389, end_mask = var_3854_end_mask_0, x = mask_to_fp16)[name = string("op_3854_cast_fp16")]; int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)]; int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)]; bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)]; tensor concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")]; tensor var_3855_begin_0 = const()[name = string("op_3855_begin_0"), val = tensor([0, 0])]; tensor var_3855_end_mask_0 = const()[name = string("op_3855_end_mask_0"), val = tensor([true, false])]; tensor var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = concat_390, end_mask = var_3855_end_mask_0, x = var_3854_cast_fp16)[name = string("op_3855_cast_fp16")]; tensor qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3855_cast_fp16)[name = string("qk_105_cast_fp16")]; tensor var_3858_cast_fp16 = softmax(axis = var_3767, x = qk_105_cast_fp16)[name = string("op_3858_cast_fp16")]; bool var_3860_transpose_x_0 = const()[name = string("op_3860_transpose_x_0"), val = bool(false)]; bool var_3860_transpose_y_0 = const()[name = string("op_3860_transpose_y_0"), val = bool(false)]; tensor v_175_cast_fp16 = transpose(perm = var_3851, x = var_3850_cast_fp16)[name = string("transpose_504")]; tensor var_3860_cast_fp16 = matmul(transpose_x = var_3860_transpose_x_0, transpose_y = var_3860_transpose_y_0, x = var_3858_cast_fp16, y = v_175_cast_fp16)[name = string("op_3860_cast_fp16")]; tensor var_3861 = const()[name = string("op_3861"), val = tensor([0, 2, 1, 3])]; tensor concat_391x = const()[name = string("concat_391x"), val = tensor([1, -1, 1280])]; tensor var_3862_cast_fp16 = transpose(perm = var_3861, x = var_3860_cast_fp16)[name = string("transpose_501")]; tensor x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3862_cast_fp16)[name = string("x_313_cast_fp16")]; tensor var_3866_to_fp16 = const()[name = string("op_3866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928606848)))]; tensor var_3867_to_fp16 = const()[name = string("op_3867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931883712)))]; tensor linear_139_cast_fp16 = linear(bias = var_3867_to_fp16, weight = var_3866_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")]; tensor var_3874_axes_0 = const()[name = string("op_3874_axes_0"), val = tensor([-1])]; tensor blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931886336)))]; tensor blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931888960)))]; tensor var_3874_cast_fp16 = layer_norm(axes = var_3874_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3874_cast_fp16")]; tensor var_3883_to_fp16 = const()[name = string("op_3883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931891584)))]; tensor var_3884_to_fp16 = const()[name = string("op_3884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935168448)))]; tensor linear_140_cast_fp16 = linear(bias = var_3884_to_fp16, weight = var_3883_to_fp16, x = var_3874_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor concat_392 = const()[name = string("concat_392"), val = tensor([0, 0, 0])]; tensor concat_393 = const()[name = string("concat_393"), val = tensor([0, 1500, 0])]; tensor k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")]; tensor concat_394 = const()[name = string("concat_394"), val = tensor([0, 0, 0])]; tensor concat_395 = const()[name = string("concat_395"), val = tensor([0, 1500, 0])]; tensor v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")]; tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, -1, 20, 64])]; tensor var_3904_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3904_cast_fp16")]; tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_143_cast_fp16 = mul(x = var_3904_cast_fp16, y = const_230_to_fp16)[name = string("q_143_cast_fp16")]; tensor var_3910 = const()[name = string("op_3910"), val = tensor([1, 1500, 20, -1])]; tensor var_3911_cast_fp16 = reshape(shape = var_3910, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3911_cast_fp16")]; tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_179_cast_fp16 = mul(x = var_3911_cast_fp16, y = const_231_to_fp16)[name = string("k_179_cast_fp16")]; tensor var_3917 = const()[name = string("op_3917"), val = tensor([1, 1500, 20, -1])]; tensor var_3918_cast_fp16 = reshape(shape = var_3917, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3918_cast_fp16")]; tensor var_3919 = const()[name = string("op_3919"), val = tensor([0, 2, 1, 3])]; bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)]; bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)]; tensor transpose_327_perm_0 = const()[name = string("transpose_327_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_328_perm_0 = const()[name = string("transpose_328_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_328 = transpose(perm = transpose_328_perm_0, x = k_179_cast_fp16)[name = string("transpose_498")]; tensor transpose_327 = transpose(perm = transpose_327_perm_0, x = q_143_cast_fp16)[name = string("transpose_499")]; tensor qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_327, y = transpose_328)[name = string("qk_107_cast_fp16")]; tensor var_3923_cast_fp16 = softmax(axis = var_3767, x = qk_107_cast_fp16)[name = string("op_3923_cast_fp16")]; bool var_3925_transpose_x_0 = const()[name = string("op_3925_transpose_x_0"), val = bool(false)]; bool var_3925_transpose_y_0 = const()[name = string("op_3925_transpose_y_0"), val = bool(false)]; tensor v_179_cast_fp16 = transpose(perm = var_3919, x = var_3918_cast_fp16)[name = string("transpose_500")]; tensor var_3925_cast_fp16 = matmul(transpose_x = var_3925_transpose_x_0, transpose_y = var_3925_transpose_y_0, x = var_3923_cast_fp16, y = v_179_cast_fp16)[name = string("op_3925_cast_fp16")]; tensor var_3926 = const()[name = string("op_3926"), val = tensor([0, 2, 1, 3])]; tensor concat_397x = const()[name = string("concat_397x"), val = tensor([1, -1, 1280])]; tensor var_3927_cast_fp16 = transpose(perm = var_3926, x = var_3925_cast_fp16)[name = string("transpose_497")]; tensor x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3927_cast_fp16)[name = string("x_319_cast_fp16")]; tensor var_3931_to_fp16 = const()[name = string("op_3931_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935171072)))]; tensor var_3932_to_fp16 = const()[name = string("op_3932_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938447936)))]; tensor linear_141_cast_fp16 = linear(bias = var_3932_to_fp16, weight = var_3931_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")]; tensor var_3939_axes_0 = const()[name = string("op_3939_axes_0"), val = tensor([-1])]; tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938450560)))]; tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938453184)))]; tensor var_3939_cast_fp16 = layer_norm(axes = var_3939_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3939_cast_fp16")]; tensor var_3948_to_fp16 = const()[name = string("op_3948_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938455808)))]; tensor var_3949_to_fp16 = const()[name = string("op_3949_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951563072)))]; tensor linear_142_cast_fp16 = linear(bias = var_3949_to_fp16, weight = var_3948_to_fp16, x = var_3939_cast_fp16)[name = string("linear_142_cast_fp16")]; string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")]; tensor x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")]; tensor var_3954_to_fp16 = const()[name = string("op_3954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951573376)))]; tensor var_3955_to_fp16 = const()[name = string("op_3955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964680640)))]; tensor linear_143_cast_fp16 = linear(bias = var_3955_to_fp16, weight = var_3954_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")]; tensor k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; tensor k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor([19, 1, 448, 1280])]; tensor k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_98)[name = string("k_cache_73_cast_fp16")]; tensor v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; tensor v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor([19, 1, 448, 1280])]; tensor v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_99)[name = string("v_cache_73_cast_fp16")]; tensor k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; tensor k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor([19, 1, 1500, 1280])]; tensor k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")]; tensor v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; tensor v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor([19, 1, 1500, 1280])]; tensor v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")]; int32 var_3978 = const()[name = string("op_3978"), val = int32(-1)]; tensor var_3996_axes_0 = const()[name = string("op_3996_axes_0"), val = tensor([-1])]; tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964683264)))]; tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964685888)))]; fp16 var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_3996_cast_fp16 = layer_norm(axes = var_3996_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3996_cast_fp16")]; tensor var_4007_to_fp16 = const()[name = string("op_4007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964688512)))]; tensor var_4008_to_fp16 = const()[name = string("op_4008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967965376)))]; tensor linear_144_cast_fp16 = linear(bias = var_4008_to_fp16, weight = var_4007_to_fp16, x = var_3996_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_4011_to_fp16 = const()[name = string("op_4011_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967968000)))]; tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4011_to_fp16, x = var_3996_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor var_4015_to_fp16 = const()[name = string("op_4015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971244864)))]; tensor var_4016_to_fp16 = const()[name = string("op_4016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974521728)))]; tensor linear_146_cast_fp16 = linear(bias = var_4016_to_fp16, weight = var_4015_to_fp16, x = var_3996_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor var_4018_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_4018_shape_cast_fp16")]; int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)]; int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)]; bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)]; string var_4018_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4018_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)]; tensor var_4018_shape_cast_fp16_to_uint16 = cast(dtype = var_4018_shape_cast_fp16_to_uint16_dtype_0, x = var_4018_shape_cast_fp16)[name = string("cast_354")]; uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_4018_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")]; string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_353")]; int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")]; tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; tensor expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor([0])]; tensor expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor([0])]; tensor expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")]; tensor concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor([18])]; int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)]; bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)]; tensor concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")]; tensor concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor([0])]; tensor concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor([0])]; tensor concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor([0])]; int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)]; bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)]; tensor concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")]; tensor k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_98)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_100_write_state")]; tensor coreml_update_state_100 = read_state(input = k_cache1)[name = string("coreml_update_state_100")]; tensor v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_99)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_101_write_state")]; tensor coreml_update_state_101 = read_state(input = v_cache1)[name = string("coreml_update_state_101")]; int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)]; int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1280)]; int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)]; bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)]; tensor concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")]; tensor var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor([0, 0, 0])]; tensor var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor([true, false, true])]; tensor var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_406, end_mask = var_4034_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4034_cast_fp16")]; tensor var_4037_begin_0 = const()[name = string("op_4037_begin_0"), val = tensor([0, 0, 0])]; tensor var_4037_end_mask_0 = const()[name = string("op_4037_end_mask_0"), val = tensor([true, false, true])]; tensor var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = concat_406, end_mask = var_4037_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4037_cast_fp16")]; tensor concat_408x = const()[name = string("concat_408x"), val = tensor([1, -1, 20, 64])]; tensor var_4047_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4047_cast_fp16")]; tensor const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_147_cast_fp16 = mul(x = var_4047_cast_fp16, y = const_232_to_fp16)[name = string("q_147_cast_fp16")]; tensor concat_409x = const()[name = string("concat_409x"), val = tensor([1, -1, 20, 64])]; tensor var_4054_cast_fp16 = reshape(shape = concat_409x, x = var_4034_cast_fp16)[name = string("op_4054_cast_fp16")]; tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_185_cast_fp16 = mul(x = var_4054_cast_fp16, y = const_233_to_fp16)[name = string("k_185_cast_fp16")]; tensor concat_410x = const()[name = string("concat_410x"), val = tensor([1, -1, 20, 64])]; tensor var_4061_cast_fp16 = reshape(shape = concat_410x, x = var_4037_cast_fp16)[name = string("op_4061_cast_fp16")]; tensor var_4062 = const()[name = string("op_4062"), val = tensor([0, 2, 1, 3])]; bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)]; bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)]; tensor transpose_329_perm_0 = const()[name = string("transpose_329_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_330_perm_0 = const()[name = string("transpose_330_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_330 = transpose(perm = transpose_330_perm_0, x = k_185_cast_fp16)[name = string("transpose_494")]; tensor transpose_329 = transpose(perm = transpose_329_perm_0, x = q_147_cast_fp16)[name = string("transpose_495")]; tensor qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_329, y = transpose_330)[name = string("qk_109_cast_fp16")]; int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)]; int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)]; bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)]; tensor concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")]; tensor var_4065_begin_0 = const()[name = string("op_4065_begin_0"), val = tensor([0, 0])]; tensor var_4065_end_mask_0 = const()[name = string("op_4065_end_mask_0"), val = tensor([false, true])]; tensor var_4065_cast_fp16 = slice_by_index(begin = var_4065_begin_0, end = concat_411, end_mask = var_4065_end_mask_0, x = mask_to_fp16)[name = string("op_4065_cast_fp16")]; int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)]; int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")]; tensor var_4066_begin_0 = const()[name = string("op_4066_begin_0"), val = tensor([0, 0])]; tensor var_4066_end_mask_0 = const()[name = string("op_4066_end_mask_0"), val = tensor([true, false])]; tensor var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = concat_412, end_mask = var_4066_end_mask_0, x = var_4065_cast_fp16)[name = string("op_4066_cast_fp16")]; tensor qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4066_cast_fp16)[name = string("qk_111_cast_fp16")]; tensor var_4069_cast_fp16 = softmax(axis = var_3978, x = qk_111_cast_fp16)[name = string("op_4069_cast_fp16")]; bool var_4071_transpose_x_0 = const()[name = string("op_4071_transpose_x_0"), val = bool(false)]; bool var_4071_transpose_y_0 = const()[name = string("op_4071_transpose_y_0"), val = bool(false)]; tensor v_185_cast_fp16 = transpose(perm = var_4062, x = var_4061_cast_fp16)[name = string("transpose_496")]; tensor var_4071_cast_fp16 = matmul(transpose_x = var_4071_transpose_x_0, transpose_y = var_4071_transpose_y_0, x = var_4069_cast_fp16, y = v_185_cast_fp16)[name = string("op_4071_cast_fp16")]; tensor var_4072 = const()[name = string("op_4072"), val = tensor([0, 2, 1, 3])]; tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, -1, 1280])]; tensor var_4073_cast_fp16 = transpose(perm = var_4072, x = var_4071_cast_fp16)[name = string("transpose_493")]; tensor x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4073_cast_fp16)[name = string("x_331_cast_fp16")]; tensor var_4077_to_fp16 = const()[name = string("op_4077_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974524352)))]; tensor var_4078_to_fp16 = const()[name = string("op_4078_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977801216)))]; tensor linear_147_cast_fp16 = linear(bias = var_4078_to_fp16, weight = var_4077_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")]; tensor var_4085_axes_0 = const()[name = string("op_4085_axes_0"), val = tensor([-1])]; tensor blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977803840)))]; tensor blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977806464)))]; tensor var_4085_cast_fp16 = layer_norm(axes = var_4085_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4085_cast_fp16")]; tensor var_4094_to_fp16 = const()[name = string("op_4094_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977809088)))]; tensor var_4095_to_fp16 = const()[name = string("op_4095_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981085952)))]; tensor linear_148_cast_fp16 = linear(bias = var_4095_to_fp16, weight = var_4094_to_fp16, x = var_4085_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor concat_414 = const()[name = string("concat_414"), val = tensor([0, 0, 0])]; tensor concat_415 = const()[name = string("concat_415"), val = tensor([0, 1500, 0])]; tensor k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")]; tensor concat_416 = const()[name = string("concat_416"), val = tensor([0, 0, 0])]; tensor concat_417 = const()[name = string("concat_417"), val = tensor([0, 1500, 0])]; tensor v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")]; tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 20, 64])]; tensor var_4115_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4115_cast_fp16")]; tensor const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_151_cast_fp16 = mul(x = var_4115_cast_fp16, y = const_234_to_fp16)[name = string("q_151_cast_fp16")]; tensor var_4121 = const()[name = string("op_4121"), val = tensor([1, 1500, 20, -1])]; tensor var_4122_cast_fp16 = reshape(shape = var_4121, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4122_cast_fp16")]; tensor const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_189_cast_fp16 = mul(x = var_4122_cast_fp16, y = const_235_to_fp16)[name = string("k_189_cast_fp16")]; tensor var_4128 = const()[name = string("op_4128"), val = tensor([1, 1500, 20, -1])]; tensor var_4129_cast_fp16 = reshape(shape = var_4128, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4129_cast_fp16")]; tensor var_4130 = const()[name = string("op_4130"), val = tensor([0, 2, 1, 3])]; bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)]; bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)]; tensor transpose_331_perm_0 = const()[name = string("transpose_331_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_332_perm_0 = const()[name = string("transpose_332_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_332 = transpose(perm = transpose_332_perm_0, x = k_189_cast_fp16)[name = string("transpose_490")]; tensor transpose_331 = transpose(perm = transpose_331_perm_0, x = q_151_cast_fp16)[name = string("transpose_491")]; tensor qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_331, y = transpose_332)[name = string("qk_113_cast_fp16")]; tensor var_4134_cast_fp16 = softmax(axis = var_3978, x = qk_113_cast_fp16)[name = string("op_4134_cast_fp16")]; bool var_4136_transpose_x_0 = const()[name = string("op_4136_transpose_x_0"), val = bool(false)]; bool var_4136_transpose_y_0 = const()[name = string("op_4136_transpose_y_0"), val = bool(false)]; tensor v_189_cast_fp16 = transpose(perm = var_4130, x = var_4129_cast_fp16)[name = string("transpose_492")]; tensor var_4136_cast_fp16 = matmul(transpose_x = var_4136_transpose_x_0, transpose_y = var_4136_transpose_y_0, x = var_4134_cast_fp16, y = v_189_cast_fp16)[name = string("op_4136_cast_fp16")]; tensor var_4137 = const()[name = string("op_4137"), val = tensor([0, 2, 1, 3])]; tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 1280])]; tensor var_4138_cast_fp16 = transpose(perm = var_4137, x = var_4136_cast_fp16)[name = string("transpose_489")]; tensor x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4138_cast_fp16)[name = string("x_337_cast_fp16")]; tensor var_4142_to_fp16 = const()[name = string("op_4142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981088576)))]; tensor var_4143_to_fp16 = const()[name = string("op_4143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984365440)))]; tensor linear_149_cast_fp16 = linear(bias = var_4143_to_fp16, weight = var_4142_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")]; tensor var_4150_axes_0 = const()[name = string("op_4150_axes_0"), val = tensor([-1])]; tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984368064)))]; tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984370688)))]; tensor var_4150_cast_fp16 = layer_norm(axes = var_4150_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4150_cast_fp16")]; tensor var_4159_to_fp16 = const()[name = string("op_4159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984373312)))]; tensor var_4160_to_fp16 = const()[name = string("op_4160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997480576)))]; tensor linear_150_cast_fp16 = linear(bias = var_4160_to_fp16, weight = var_4159_to_fp16, x = var_4150_cast_fp16)[name = string("linear_150_cast_fp16")]; string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")]; tensor x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")]; tensor var_4165_to_fp16 = const()[name = string("op_4165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997490880)))]; tensor var_4166_to_fp16 = const()[name = string("op_4166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010598144)))]; tensor linear_151_cast_fp16 = linear(bias = var_4166_to_fp16, weight = var_4165_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")]; tensor k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; tensor k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor([20, 1, 448, 1280])]; tensor k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_100)[name = string("k_cache_77_cast_fp16")]; tensor v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; tensor v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor([20, 1, 448, 1280])]; tensor v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_101)[name = string("v_cache_77_cast_fp16")]; tensor k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; tensor k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor([20, 1, 1500, 1280])]; tensor k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")]; tensor v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; tensor v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor([20, 1, 1500, 1280])]; tensor v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")]; int32 var_4189 = const()[name = string("op_4189"), val = int32(-1)]; tensor var_4207_axes_0 = const()[name = string("op_4207_axes_0"), val = tensor([-1])]; tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010600768)))]; tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010603392)))]; fp16 var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4207_cast_fp16 = layer_norm(axes = var_4207_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4207_cast_fp16")]; tensor var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010606016)))]; tensor var_4219_to_fp16 = const()[name = string("op_4219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013882880)))]; tensor linear_152_cast_fp16 = linear(bias = var_4219_to_fp16, weight = var_4218_to_fp16, x = var_4207_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor var_4222_to_fp16 = const()[name = string("op_4222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013885504)))]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4222_to_fp16, x = var_4207_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor var_4226_to_fp16 = const()[name = string("op_4226_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017162368)))]; tensor var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020439232)))]; tensor linear_154_cast_fp16 = linear(bias = var_4227_to_fp16, weight = var_4226_to_fp16, x = var_4207_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor var_4229_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4229_shape_cast_fp16")]; int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)]; int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)]; bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)]; string var_4229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)]; tensor var_4229_shape_cast_fp16_to_uint16 = cast(dtype = var_4229_shape_cast_fp16_to_uint16_dtype_0, x = var_4229_shape_cast_fp16)[name = string("cast_352")]; uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4229_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")]; string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_351")]; int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")]; tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([0])]; tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([0])]; tensor expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor([0])]; tensor expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")]; tensor concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor([19])]; int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)]; bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)]; tensor concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")]; tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([0])]; tensor concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor([0])]; tensor concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor([0])]; int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")]; tensor k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_100)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_102_write_state")]; tensor coreml_update_state_102 = read_state(input = k_cache1)[name = string("coreml_update_state_102")]; tensor v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_101)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_103_write_state")]; tensor coreml_update_state_103 = read_state(input = v_cache1)[name = string("coreml_update_state_103")]; int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)]; int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1280)]; int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)]; bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)]; tensor concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")]; tensor var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor([0, 0, 0])]; tensor var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor([true, false, true])]; tensor var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_428, end_mask = var_4245_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4245_cast_fp16")]; tensor var_4248_begin_0 = const()[name = string("op_4248_begin_0"), val = tensor([0, 0, 0])]; tensor var_4248_end_mask_0 = const()[name = string("op_4248_end_mask_0"), val = tensor([true, false, true])]; tensor var_4248_cast_fp16 = slice_by_index(begin = var_4248_begin_0, end = concat_428, end_mask = var_4248_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4248_cast_fp16")]; tensor concat_430x = const()[name = string("concat_430x"), val = tensor([1, -1, 20, 64])]; tensor var_4258_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4258_cast_fp16")]; tensor const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_155_cast_fp16 = mul(x = var_4258_cast_fp16, y = const_236_to_fp16)[name = string("q_155_cast_fp16")]; tensor concat_431x = const()[name = string("concat_431x"), val = tensor([1, -1, 20, 64])]; tensor var_4265_cast_fp16 = reshape(shape = concat_431x, x = var_4245_cast_fp16)[name = string("op_4265_cast_fp16")]; tensor const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_195_cast_fp16 = mul(x = var_4265_cast_fp16, y = const_237_to_fp16)[name = string("k_195_cast_fp16")]; tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, -1, 20, 64])]; tensor var_4272_cast_fp16 = reshape(shape = concat_432x, x = var_4248_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor var_4273 = const()[name = string("op_4273"), val = tensor([0, 2, 1, 3])]; bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)]; bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)]; tensor transpose_333_perm_0 = const()[name = string("transpose_333_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_334_perm_0 = const()[name = string("transpose_334_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_334 = transpose(perm = transpose_334_perm_0, x = k_195_cast_fp16)[name = string("transpose_486")]; tensor transpose_333 = transpose(perm = transpose_333_perm_0, x = q_155_cast_fp16)[name = string("transpose_487")]; tensor qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_333, y = transpose_334)[name = string("qk_115_cast_fp16")]; int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)]; int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")]; tensor var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor([0, 0])]; tensor var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor([false, true])]; tensor var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = concat_433, end_mask = var_4276_end_mask_0, x = mask_to_fp16)[name = string("op_4276_cast_fp16")]; int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)]; int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")]; tensor var_4277_begin_0 = const()[name = string("op_4277_begin_0"), val = tensor([0, 0])]; tensor var_4277_end_mask_0 = const()[name = string("op_4277_end_mask_0"), val = tensor([true, false])]; tensor var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = concat_434, end_mask = var_4277_end_mask_0, x = var_4276_cast_fp16)[name = string("op_4277_cast_fp16")]; tensor qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4277_cast_fp16)[name = string("qk_117_cast_fp16")]; tensor var_4280_cast_fp16 = softmax(axis = var_4189, x = qk_117_cast_fp16)[name = string("op_4280_cast_fp16")]; bool var_4282_transpose_x_0 = const()[name = string("op_4282_transpose_x_0"), val = bool(false)]; bool var_4282_transpose_y_0 = const()[name = string("op_4282_transpose_y_0"), val = bool(false)]; tensor v_195_cast_fp16 = transpose(perm = var_4273, x = var_4272_cast_fp16)[name = string("transpose_488")]; tensor var_4282_cast_fp16 = matmul(transpose_x = var_4282_transpose_x_0, transpose_y = var_4282_transpose_y_0, x = var_4280_cast_fp16, y = v_195_cast_fp16)[name = string("op_4282_cast_fp16")]; tensor var_4283 = const()[name = string("op_4283"), val = tensor([0, 2, 1, 3])]; tensor concat_435x = const()[name = string("concat_435x"), val = tensor([1, -1, 1280])]; tensor var_4284_cast_fp16 = transpose(perm = var_4283, x = var_4282_cast_fp16)[name = string("transpose_485")]; tensor x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4284_cast_fp16)[name = string("x_349_cast_fp16")]; tensor var_4288_to_fp16 = const()[name = string("op_4288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020441856)))]; tensor var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023718720)))]; tensor linear_155_cast_fp16 = linear(bias = var_4289_to_fp16, weight = var_4288_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")]; tensor var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor([-1])]; tensor blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023721344)))]; tensor blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023723968)))]; tensor var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4296_cast_fp16")]; tensor var_4305_to_fp16 = const()[name = string("op_4305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023726592)))]; tensor var_4306_to_fp16 = const()[name = string("op_4306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027003456)))]; tensor linear_156_cast_fp16 = linear(bias = var_4306_to_fp16, weight = var_4305_to_fp16, x = var_4296_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor concat_436 = const()[name = string("concat_436"), val = tensor([0, 0, 0])]; tensor concat_437 = const()[name = string("concat_437"), val = tensor([0, 1500, 0])]; tensor k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")]; tensor concat_438 = const()[name = string("concat_438"), val = tensor([0, 0, 0])]; tensor concat_439 = const()[name = string("concat_439"), val = tensor([0, 1500, 0])]; tensor v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")]; tensor concat_440x = const()[name = string("concat_440x"), val = tensor([1, -1, 20, 64])]; tensor var_4326_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4326_cast_fp16")]; tensor const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_159_cast_fp16 = mul(x = var_4326_cast_fp16, y = const_238_to_fp16)[name = string("q_159_cast_fp16")]; tensor var_4332 = const()[name = string("op_4332"), val = tensor([1, 1500, 20, -1])]; tensor var_4333_cast_fp16 = reshape(shape = var_4332, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4333_cast_fp16")]; tensor const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_199_cast_fp16 = mul(x = var_4333_cast_fp16, y = const_239_to_fp16)[name = string("k_199_cast_fp16")]; tensor var_4339 = const()[name = string("op_4339"), val = tensor([1, 1500, 20, -1])]; tensor var_4340_cast_fp16 = reshape(shape = var_4339, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4340_cast_fp16")]; tensor var_4341 = const()[name = string("op_4341"), val = tensor([0, 2, 1, 3])]; bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)]; bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)]; tensor transpose_335_perm_0 = const()[name = string("transpose_335_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_336_perm_0 = const()[name = string("transpose_336_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_336 = transpose(perm = transpose_336_perm_0, x = k_199_cast_fp16)[name = string("transpose_482")]; tensor transpose_335 = transpose(perm = transpose_335_perm_0, x = q_159_cast_fp16)[name = string("transpose_483")]; tensor qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_335, y = transpose_336)[name = string("qk_119_cast_fp16")]; tensor var_4345_cast_fp16 = softmax(axis = var_4189, x = qk_119_cast_fp16)[name = string("op_4345_cast_fp16")]; bool var_4347_transpose_x_0 = const()[name = string("op_4347_transpose_x_0"), val = bool(false)]; bool var_4347_transpose_y_0 = const()[name = string("op_4347_transpose_y_0"), val = bool(false)]; tensor v_199_cast_fp16 = transpose(perm = var_4341, x = var_4340_cast_fp16)[name = string("transpose_484")]; tensor var_4347_cast_fp16 = matmul(transpose_x = var_4347_transpose_x_0, transpose_y = var_4347_transpose_y_0, x = var_4345_cast_fp16, y = v_199_cast_fp16)[name = string("op_4347_cast_fp16")]; tensor var_4348 = const()[name = string("op_4348"), val = tensor([0, 2, 1, 3])]; tensor concat_441x = const()[name = string("concat_441x"), val = tensor([1, -1, 1280])]; tensor var_4349_cast_fp16 = transpose(perm = var_4348, x = var_4347_cast_fp16)[name = string("transpose_481")]; tensor x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4349_cast_fp16)[name = string("x_355_cast_fp16")]; tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027006080)))]; tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030282944)))]; tensor linear_157_cast_fp16 = linear(bias = var_4354_to_fp16, weight = var_4353_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")]; tensor var_4361_axes_0 = const()[name = string("op_4361_axes_0"), val = tensor([-1])]; tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030285568)))]; tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030288192)))]; tensor var_4361_cast_fp16 = layer_norm(axes = var_4361_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4361_cast_fp16")]; tensor var_4370_to_fp16 = const()[name = string("op_4370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030290816)))]; tensor var_4371_to_fp16 = const()[name = string("op_4371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043398080)))]; tensor linear_158_cast_fp16 = linear(bias = var_4371_to_fp16, weight = var_4370_to_fp16, x = var_4361_cast_fp16)[name = string("linear_158_cast_fp16")]; string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")]; tensor x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")]; tensor var_4376_to_fp16 = const()[name = string("op_4376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043408384)))]; tensor var_4377_to_fp16 = const()[name = string("op_4377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056515648)))]; tensor linear_159_cast_fp16 = linear(bias = var_4377_to_fp16, weight = var_4376_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")]; tensor k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; tensor k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor([21, 1, 448, 1280])]; tensor k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_102)[name = string("k_cache_81_cast_fp16")]; tensor v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; tensor v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor([21, 1, 448, 1280])]; tensor v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_103)[name = string("v_cache_81_cast_fp16")]; tensor k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; tensor k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor([21, 1, 1500, 1280])]; tensor k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")]; tensor v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; tensor v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor([21, 1, 1500, 1280])]; tensor v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")]; int32 var_4400 = const()[name = string("op_4400"), val = int32(-1)]; tensor var_4418_axes_0 = const()[name = string("op_4418_axes_0"), val = tensor([-1])]; tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056518272)))]; tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056520896)))]; fp16 var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4418_cast_fp16 = layer_norm(axes = var_4418_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4418_cast_fp16")]; tensor var_4429_to_fp16 = const()[name = string("op_4429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056523520)))]; tensor var_4430_to_fp16 = const()[name = string("op_4430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059800384)))]; tensor linear_160_cast_fp16 = linear(bias = var_4430_to_fp16, weight = var_4429_to_fp16, x = var_4418_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor var_4433_to_fp16 = const()[name = string("op_4433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059803008)))]; tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4433_to_fp16, x = var_4418_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor var_4437_to_fp16 = const()[name = string("op_4437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1063079872)))]; tensor var_4438_to_fp16 = const()[name = string("op_4438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066356736)))]; tensor linear_162_cast_fp16 = linear(bias = var_4438_to_fp16, weight = var_4437_to_fp16, x = var_4418_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor var_4440_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4440_shape_cast_fp16")]; int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)]; int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)]; bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)]; string var_4440_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4440_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)]; tensor var_4440_shape_cast_fp16_to_uint16 = cast(dtype = var_4440_shape_cast_fp16_to_uint16_dtype_0, x = var_4440_shape_cast_fp16)[name = string("cast_350")]; uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4440_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")]; string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_349")]; int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")]; tensor expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor([0])]; tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([0])]; tensor expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor([0])]; tensor expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")]; tensor concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor([20])]; int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)]; bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)]; tensor concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")]; tensor concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor([0])]; tensor concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor([0])]; tensor concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor([0])]; int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)]; bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)]; tensor concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")]; tensor k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_102)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_104_write_state")]; tensor coreml_update_state_104 = read_state(input = k_cache1)[name = string("coreml_update_state_104")]; tensor v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_103)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_105_write_state")]; tensor coreml_update_state_105 = read_state(input = v_cache1)[name = string("coreml_update_state_105")]; int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)]; int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1280)]; int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")]; tensor var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor([0, 0, 0])]; tensor var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor([true, false, true])]; tensor var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_450, end_mask = var_4456_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4456_cast_fp16")]; tensor var_4459_begin_0 = const()[name = string("op_4459_begin_0"), val = tensor([0, 0, 0])]; tensor var_4459_end_mask_0 = const()[name = string("op_4459_end_mask_0"), val = tensor([true, false, true])]; tensor var_4459_cast_fp16 = slice_by_index(begin = var_4459_begin_0, end = concat_450, end_mask = var_4459_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4459_cast_fp16")]; tensor concat_452x = const()[name = string("concat_452x"), val = tensor([1, -1, 20, 64])]; tensor var_4469_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4469_cast_fp16")]; tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_163_cast_fp16 = mul(x = var_4469_cast_fp16, y = const_240_to_fp16)[name = string("q_163_cast_fp16")]; tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, -1, 20, 64])]; tensor var_4476_cast_fp16 = reshape(shape = concat_453x, x = var_4456_cast_fp16)[name = string("op_4476_cast_fp16")]; tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_205_cast_fp16 = mul(x = var_4476_cast_fp16, y = const_241_to_fp16)[name = string("k_205_cast_fp16")]; tensor concat_454x = const()[name = string("concat_454x"), val = tensor([1, -1, 20, 64])]; tensor var_4483_cast_fp16 = reshape(shape = concat_454x, x = var_4459_cast_fp16)[name = string("op_4483_cast_fp16")]; tensor var_4484 = const()[name = string("op_4484"), val = tensor([0, 2, 1, 3])]; bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)]; bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)]; tensor transpose_337_perm_0 = const()[name = string("transpose_337_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_338_perm_0 = const()[name = string("transpose_338_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_338 = transpose(perm = transpose_338_perm_0, x = k_205_cast_fp16)[name = string("transpose_478")]; tensor transpose_337 = transpose(perm = transpose_337_perm_0, x = q_163_cast_fp16)[name = string("transpose_479")]; tensor qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_337, y = transpose_338)[name = string("qk_121_cast_fp16")]; int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)]; int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")]; tensor var_4487_begin_0 = const()[name = string("op_4487_begin_0"), val = tensor([0, 0])]; tensor var_4487_end_mask_0 = const()[name = string("op_4487_end_mask_0"), val = tensor([false, true])]; tensor var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = concat_455, end_mask = var_4487_end_mask_0, x = mask_to_fp16)[name = string("op_4487_cast_fp16")]; int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)]; int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")]; tensor var_4488_begin_0 = const()[name = string("op_4488_begin_0"), val = tensor([0, 0])]; tensor var_4488_end_mask_0 = const()[name = string("op_4488_end_mask_0"), val = tensor([true, false])]; tensor var_4488_cast_fp16 = slice_by_index(begin = var_4488_begin_0, end = concat_456, end_mask = var_4488_end_mask_0, x = var_4487_cast_fp16)[name = string("op_4488_cast_fp16")]; tensor qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4488_cast_fp16)[name = string("qk_123_cast_fp16")]; tensor var_4491_cast_fp16 = softmax(axis = var_4400, x = qk_123_cast_fp16)[name = string("op_4491_cast_fp16")]; bool var_4493_transpose_x_0 = const()[name = string("op_4493_transpose_x_0"), val = bool(false)]; bool var_4493_transpose_y_0 = const()[name = string("op_4493_transpose_y_0"), val = bool(false)]; tensor v_205_cast_fp16 = transpose(perm = var_4484, x = var_4483_cast_fp16)[name = string("transpose_480")]; tensor var_4493_cast_fp16 = matmul(transpose_x = var_4493_transpose_x_0, transpose_y = var_4493_transpose_y_0, x = var_4491_cast_fp16, y = v_205_cast_fp16)[name = string("op_4493_cast_fp16")]; tensor var_4494 = const()[name = string("op_4494"), val = tensor([0, 2, 1, 3])]; tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 1280])]; tensor var_4495_cast_fp16 = transpose(perm = var_4494, x = var_4493_cast_fp16)[name = string("transpose_477")]; tensor x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4495_cast_fp16)[name = string("x_367_cast_fp16")]; tensor var_4499_to_fp16 = const()[name = string("op_4499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066359360)))]; tensor var_4500_to_fp16 = const()[name = string("op_4500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069636224)))]; tensor linear_163_cast_fp16 = linear(bias = var_4500_to_fp16, weight = var_4499_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")]; tensor var_4507_axes_0 = const()[name = string("op_4507_axes_0"), val = tensor([-1])]; tensor blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069638848)))]; tensor blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069641472)))]; tensor var_4507_cast_fp16 = layer_norm(axes = var_4507_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4507_cast_fp16")]; tensor var_4516_to_fp16 = const()[name = string("op_4516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069644096)))]; tensor var_4517_to_fp16 = const()[name = string("op_4517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072920960)))]; tensor linear_164_cast_fp16 = linear(bias = var_4517_to_fp16, weight = var_4516_to_fp16, x = var_4507_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor concat_458 = const()[name = string("concat_458"), val = tensor([0, 0, 0])]; tensor concat_459 = const()[name = string("concat_459"), val = tensor([0, 1500, 0])]; tensor k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")]; tensor concat_460 = const()[name = string("concat_460"), val = tensor([0, 0, 0])]; tensor concat_461 = const()[name = string("concat_461"), val = tensor([0, 1500, 0])]; tensor v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")]; tensor concat_462x = const()[name = string("concat_462x"), val = tensor([1, -1, 20, 64])]; tensor var_4537_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4537_cast_fp16")]; tensor const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_167_cast_fp16 = mul(x = var_4537_cast_fp16, y = const_242_to_fp16)[name = string("q_167_cast_fp16")]; tensor var_4543 = const()[name = string("op_4543"), val = tensor([1, 1500, 20, -1])]; tensor var_4544_cast_fp16 = reshape(shape = var_4543, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4544_cast_fp16")]; tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_209_cast_fp16 = mul(x = var_4544_cast_fp16, y = const_243_to_fp16)[name = string("k_209_cast_fp16")]; tensor var_4550 = const()[name = string("op_4550"), val = tensor([1, 1500, 20, -1])]; tensor var_4551_cast_fp16 = reshape(shape = var_4550, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4551_cast_fp16")]; tensor var_4552 = const()[name = string("op_4552"), val = tensor([0, 2, 1, 3])]; bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)]; bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)]; tensor transpose_339_perm_0 = const()[name = string("transpose_339_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_340_perm_0 = const()[name = string("transpose_340_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_340 = transpose(perm = transpose_340_perm_0, x = k_209_cast_fp16)[name = string("transpose_474")]; tensor transpose_339 = transpose(perm = transpose_339_perm_0, x = q_167_cast_fp16)[name = string("transpose_475")]; tensor qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_339, y = transpose_340)[name = string("qk_125_cast_fp16")]; tensor var_4556_cast_fp16 = softmax(axis = var_4400, x = qk_125_cast_fp16)[name = string("op_4556_cast_fp16")]; bool var_4558_transpose_x_0 = const()[name = string("op_4558_transpose_x_0"), val = bool(false)]; bool var_4558_transpose_y_0 = const()[name = string("op_4558_transpose_y_0"), val = bool(false)]; tensor v_209_cast_fp16 = transpose(perm = var_4552, x = var_4551_cast_fp16)[name = string("transpose_476")]; tensor var_4558_cast_fp16 = matmul(transpose_x = var_4558_transpose_x_0, transpose_y = var_4558_transpose_y_0, x = var_4556_cast_fp16, y = v_209_cast_fp16)[name = string("op_4558_cast_fp16")]; tensor var_4559 = const()[name = string("op_4559"), val = tensor([0, 2, 1, 3])]; tensor concat_463x = const()[name = string("concat_463x"), val = tensor([1, -1, 1280])]; tensor var_4560_cast_fp16 = transpose(perm = var_4559, x = var_4558_cast_fp16)[name = string("transpose_473")]; tensor x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4560_cast_fp16)[name = string("x_373_cast_fp16")]; tensor var_4564_to_fp16 = const()[name = string("op_4564_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072923584)))]; tensor var_4565_to_fp16 = const()[name = string("op_4565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076200448)))]; tensor linear_165_cast_fp16 = linear(bias = var_4565_to_fp16, weight = var_4564_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")]; tensor var_4572_axes_0 = const()[name = string("op_4572_axes_0"), val = tensor([-1])]; tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076203072)))]; tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076205696)))]; tensor var_4572_cast_fp16 = layer_norm(axes = var_4572_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4572_cast_fp16")]; tensor var_4581_to_fp16 = const()[name = string("op_4581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076208320)))]; tensor var_4582_to_fp16 = const()[name = string("op_4582_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089315584)))]; tensor linear_166_cast_fp16 = linear(bias = var_4582_to_fp16, weight = var_4581_to_fp16, x = var_4572_cast_fp16)[name = string("linear_166_cast_fp16")]; string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")]; tensor x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")]; tensor var_4587_to_fp16 = const()[name = string("op_4587_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089325888)))]; tensor var_4588_to_fp16 = const()[name = string("op_4588_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102433152)))]; tensor linear_167_cast_fp16 = linear(bias = var_4588_to_fp16, weight = var_4587_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")]; tensor k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; tensor k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor([22, 1, 448, 1280])]; tensor k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_104)[name = string("k_cache_85_cast_fp16")]; tensor v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; tensor v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor([22, 1, 448, 1280])]; tensor v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_105)[name = string("v_cache_85_cast_fp16")]; tensor k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; tensor k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor([22, 1, 1500, 1280])]; tensor k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")]; tensor v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; tensor v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor([22, 1, 1500, 1280])]; tensor v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")]; int32 var_4611 = const()[name = string("op_4611"), val = int32(-1)]; tensor var_4629_axes_0 = const()[name = string("op_4629_axes_0"), val = tensor([-1])]; tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102435776)))]; tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102438400)))]; fp16 var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4629_cast_fp16 = layer_norm(axes = var_4629_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4629_cast_fp16")]; tensor var_4640_to_fp16 = const()[name = string("op_4640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102441024)))]; tensor var_4641_to_fp16 = const()[name = string("op_4641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105717888)))]; tensor linear_168_cast_fp16 = linear(bias = var_4641_to_fp16, weight = var_4640_to_fp16, x = var_4629_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor var_4644_to_fp16 = const()[name = string("op_4644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105720512)))]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4644_to_fp16, x = var_4629_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor var_4648_to_fp16 = const()[name = string("op_4648_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108997376)))]; tensor var_4649_to_fp16 = const()[name = string("op_4649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112274240)))]; tensor linear_170_cast_fp16 = linear(bias = var_4649_to_fp16, weight = var_4648_to_fp16, x = var_4629_cast_fp16)[name = string("linear_170_cast_fp16")]; tensor var_4651_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4651_shape_cast_fp16")]; int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)]; int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)]; bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)]; string var_4651_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4651_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)]; tensor var_4651_shape_cast_fp16_to_uint16 = cast(dtype = var_4651_shape_cast_fp16_to_uint16_dtype_0, x = var_4651_shape_cast_fp16)[name = string("cast_348")]; uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4651_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")]; string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_347")]; int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")]; tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; tensor expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor([0])]; tensor expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor([0])]; tensor expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")]; tensor concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor([21])]; int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)]; bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)]; tensor concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")]; tensor concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor([0])]; tensor concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor([0])]; tensor concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor([0])]; int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)]; bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)]; tensor concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")]; tensor k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_104)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_106_write_state")]; tensor coreml_update_state_106 = read_state(input = k_cache1)[name = string("coreml_update_state_106")]; tensor v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_105)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_107_write_state")]; tensor coreml_update_state_107 = read_state(input = v_cache1)[name = string("coreml_update_state_107")]; int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)]; int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1280)]; int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)]; bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)]; tensor concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")]; tensor var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor([0, 0, 0])]; tensor var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor([true, false, true])]; tensor var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_472, end_mask = var_4667_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4667_cast_fp16")]; tensor var_4670_begin_0 = const()[name = string("op_4670_begin_0"), val = tensor([0, 0, 0])]; tensor var_4670_end_mask_0 = const()[name = string("op_4670_end_mask_0"), val = tensor([true, false, true])]; tensor var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = concat_472, end_mask = var_4670_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4670_cast_fp16")]; tensor concat_474x = const()[name = string("concat_474x"), val = tensor([1, -1, 20, 64])]; tensor var_4680_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4680_cast_fp16")]; tensor const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_171_cast_fp16 = mul(x = var_4680_cast_fp16, y = const_244_to_fp16)[name = string("q_171_cast_fp16")]; tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 20, 64])]; tensor var_4687_cast_fp16 = reshape(shape = concat_475x, x = var_4667_cast_fp16)[name = string("op_4687_cast_fp16")]; tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_215_cast_fp16 = mul(x = var_4687_cast_fp16, y = const_245_to_fp16)[name = string("k_215_cast_fp16")]; tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 20, 64])]; tensor var_4694_cast_fp16 = reshape(shape = concat_476x, x = var_4670_cast_fp16)[name = string("op_4694_cast_fp16")]; tensor var_4695 = const()[name = string("op_4695"), val = tensor([0, 2, 1, 3])]; bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)]; bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)]; tensor transpose_341_perm_0 = const()[name = string("transpose_341_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_342_perm_0 = const()[name = string("transpose_342_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_342 = transpose(perm = transpose_342_perm_0, x = k_215_cast_fp16)[name = string("transpose_470")]; tensor transpose_341 = transpose(perm = transpose_341_perm_0, x = q_171_cast_fp16)[name = string("transpose_471")]; tensor qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_341, y = transpose_342)[name = string("qk_127_cast_fp16")]; int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)]; int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)]; bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)]; tensor concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")]; tensor var_4698_begin_0 = const()[name = string("op_4698_begin_0"), val = tensor([0, 0])]; tensor var_4698_end_mask_0 = const()[name = string("op_4698_end_mask_0"), val = tensor([false, true])]; tensor var_4698_cast_fp16 = slice_by_index(begin = var_4698_begin_0, end = concat_477, end_mask = var_4698_end_mask_0, x = mask_to_fp16)[name = string("op_4698_cast_fp16")]; int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)]; int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)]; bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)]; tensor concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")]; tensor var_4699_begin_0 = const()[name = string("op_4699_begin_0"), val = tensor([0, 0])]; tensor var_4699_end_mask_0 = const()[name = string("op_4699_end_mask_0"), val = tensor([true, false])]; tensor var_4699_cast_fp16 = slice_by_index(begin = var_4699_begin_0, end = concat_478, end_mask = var_4699_end_mask_0, x = var_4698_cast_fp16)[name = string("op_4699_cast_fp16")]; tensor qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4699_cast_fp16)[name = string("qk_129_cast_fp16")]; tensor var_4702_cast_fp16 = softmax(axis = var_4611, x = qk_129_cast_fp16)[name = string("op_4702_cast_fp16")]; bool var_4704_transpose_x_0 = const()[name = string("op_4704_transpose_x_0"), val = bool(false)]; bool var_4704_transpose_y_0 = const()[name = string("op_4704_transpose_y_0"), val = bool(false)]; tensor v_215_cast_fp16 = transpose(perm = var_4695, x = var_4694_cast_fp16)[name = string("transpose_472")]; tensor var_4704_cast_fp16 = matmul(transpose_x = var_4704_transpose_x_0, transpose_y = var_4704_transpose_y_0, x = var_4702_cast_fp16, y = v_215_cast_fp16)[name = string("op_4704_cast_fp16")]; tensor var_4705 = const()[name = string("op_4705"), val = tensor([0, 2, 1, 3])]; tensor concat_479x = const()[name = string("concat_479x"), val = tensor([1, -1, 1280])]; tensor var_4706_cast_fp16 = transpose(perm = var_4705, x = var_4704_cast_fp16)[name = string("transpose_469")]; tensor x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4706_cast_fp16)[name = string("x_385_cast_fp16")]; tensor var_4710_to_fp16 = const()[name = string("op_4710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112276864)))]; tensor var_4711_to_fp16 = const()[name = string("op_4711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115553728)))]; tensor linear_171_cast_fp16 = linear(bias = var_4711_to_fp16, weight = var_4710_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")]; tensor var_4718_axes_0 = const()[name = string("op_4718_axes_0"), val = tensor([-1])]; tensor blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115556352)))]; tensor blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115558976)))]; tensor var_4718_cast_fp16 = layer_norm(axes = var_4718_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4718_cast_fp16")]; tensor var_4727_to_fp16 = const()[name = string("op_4727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115561600)))]; tensor var_4728_to_fp16 = const()[name = string("op_4728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118838464)))]; tensor linear_172_cast_fp16 = linear(bias = var_4728_to_fp16, weight = var_4727_to_fp16, x = var_4718_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor concat_480 = const()[name = string("concat_480"), val = tensor([0, 0, 0])]; tensor concat_481 = const()[name = string("concat_481"), val = tensor([0, 1500, 0])]; tensor k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")]; tensor concat_482 = const()[name = string("concat_482"), val = tensor([0, 0, 0])]; tensor concat_483 = const()[name = string("concat_483"), val = tensor([0, 1500, 0])]; tensor v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")]; tensor concat_484x = const()[name = string("concat_484x"), val = tensor([1, -1, 20, 64])]; tensor var_4748_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4748_cast_fp16")]; tensor const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_175_cast_fp16 = mul(x = var_4748_cast_fp16, y = const_246_to_fp16)[name = string("q_175_cast_fp16")]; tensor var_4754 = const()[name = string("op_4754"), val = tensor([1, 1500, 20, -1])]; tensor var_4755_cast_fp16 = reshape(shape = var_4754, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4755_cast_fp16")]; tensor const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_219_cast_fp16 = mul(x = var_4755_cast_fp16, y = const_247_to_fp16)[name = string("k_219_cast_fp16")]; tensor var_4761 = const()[name = string("op_4761"), val = tensor([1, 1500, 20, -1])]; tensor var_4762_cast_fp16 = reshape(shape = var_4761, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4762_cast_fp16")]; tensor var_4763 = const()[name = string("op_4763"), val = tensor([0, 2, 1, 3])]; bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)]; bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)]; tensor transpose_343_perm_0 = const()[name = string("transpose_343_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_344_perm_0 = const()[name = string("transpose_344_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_344 = transpose(perm = transpose_344_perm_0, x = k_219_cast_fp16)[name = string("transpose_466")]; tensor transpose_343 = transpose(perm = transpose_343_perm_0, x = q_175_cast_fp16)[name = string("transpose_467")]; tensor qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_343, y = transpose_344)[name = string("qk_131_cast_fp16")]; tensor var_4767_cast_fp16 = softmax(axis = var_4611, x = qk_131_cast_fp16)[name = string("op_4767_cast_fp16")]; bool var_4769_transpose_x_0 = const()[name = string("op_4769_transpose_x_0"), val = bool(false)]; bool var_4769_transpose_y_0 = const()[name = string("op_4769_transpose_y_0"), val = bool(false)]; tensor v_219_cast_fp16 = transpose(perm = var_4763, x = var_4762_cast_fp16)[name = string("transpose_468")]; tensor var_4769_cast_fp16 = matmul(transpose_x = var_4769_transpose_x_0, transpose_y = var_4769_transpose_y_0, x = var_4767_cast_fp16, y = v_219_cast_fp16)[name = string("op_4769_cast_fp16")]; tensor var_4770 = const()[name = string("op_4770"), val = tensor([0, 2, 1, 3])]; tensor concat_485x = const()[name = string("concat_485x"), val = tensor([1, -1, 1280])]; tensor var_4771_cast_fp16 = transpose(perm = var_4770, x = var_4769_cast_fp16)[name = string("transpose_465")]; tensor x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4771_cast_fp16)[name = string("x_391_cast_fp16")]; tensor var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118841088)))]; tensor var_4776_to_fp16 = const()[name = string("op_4776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122117952)))]; tensor linear_173_cast_fp16 = linear(bias = var_4776_to_fp16, weight = var_4775_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")]; tensor var_4783_axes_0 = const()[name = string("op_4783_axes_0"), val = tensor([-1])]; tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122120576)))]; tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122123200)))]; tensor var_4783_cast_fp16 = layer_norm(axes = var_4783_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4783_cast_fp16")]; tensor var_4792_to_fp16 = const()[name = string("op_4792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122125824)))]; tensor var_4793_to_fp16 = const()[name = string("op_4793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135233088)))]; tensor linear_174_cast_fp16 = linear(bias = var_4793_to_fp16, weight = var_4792_to_fp16, x = var_4783_cast_fp16)[name = string("linear_174_cast_fp16")]; string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")]; tensor x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")]; tensor var_4798_to_fp16 = const()[name = string("op_4798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135243392)))]; tensor var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148350656)))]; tensor linear_175_cast_fp16 = linear(bias = var_4799_to_fp16, weight = var_4798_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")]; tensor k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; tensor k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor([23, 1, 448, 1280])]; tensor k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_106)[name = string("k_cache_89_cast_fp16")]; tensor v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; tensor v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor([23, 1, 448, 1280])]; tensor v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_107)[name = string("v_cache_89_cast_fp16")]; tensor k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; tensor k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor([23, 1, 1500, 1280])]; tensor k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")]; tensor v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; tensor v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor([23, 1, 1500, 1280])]; tensor v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")]; int32 var_4822 = const()[name = string("op_4822"), val = int32(-1)]; tensor var_4840_axes_0 = const()[name = string("op_4840_axes_0"), val = tensor([-1])]; tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148353280)))]; tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148355904)))]; fp16 var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_4840_cast_fp16 = layer_norm(axes = var_4840_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4840_cast_fp16")]; tensor var_4851_to_fp16 = const()[name = string("op_4851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148358528)))]; tensor var_4852_to_fp16 = const()[name = string("op_4852_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151635392)))]; tensor linear_176_cast_fp16 = linear(bias = var_4852_to_fp16, weight = var_4851_to_fp16, x = var_4840_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor var_4855_to_fp16 = const()[name = string("op_4855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151638016)))]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4855_to_fp16, x = var_4840_cast_fp16)[name = string("linear_177_cast_fp16")]; tensor var_4859_to_fp16 = const()[name = string("op_4859_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1154914880)))]; tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158191744)))]; tensor linear_178_cast_fp16 = linear(bias = var_4860_to_fp16, weight = var_4859_to_fp16, x = var_4840_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor var_4862_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4862_shape_cast_fp16")]; int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)]; int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)]; bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)]; string var_4862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)]; tensor var_4862_shape_cast_fp16_to_uint16 = cast(dtype = var_4862_shape_cast_fp16_to_uint16_dtype_0, x = var_4862_shape_cast_fp16)[name = string("cast_346")]; uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4862_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")]; string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_345")]; int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")]; tensor expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor([0])]; tensor expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor([0])]; tensor expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor([0])]; tensor expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")]; tensor concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor([22])]; int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")]; tensor concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor([0])]; tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")]; tensor k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_106)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_108_write_state")]; tensor coreml_update_state_108 = read_state(input = k_cache1)[name = string("coreml_update_state_108")]; tensor v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_107)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_109_write_state")]; tensor coreml_update_state_109 = read_state(input = v_cache1)[name = string("coreml_update_state_109")]; int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)]; int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1280)]; int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)]; bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)]; tensor concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")]; tensor var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor([0, 0, 0])]; tensor var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor([true, false, true])]; tensor var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_494, end_mask = var_4878_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4878_cast_fp16")]; tensor var_4881_begin_0 = const()[name = string("op_4881_begin_0"), val = tensor([0, 0, 0])]; tensor var_4881_end_mask_0 = const()[name = string("op_4881_end_mask_0"), val = tensor([true, false, true])]; tensor var_4881_cast_fp16 = slice_by_index(begin = var_4881_begin_0, end = concat_494, end_mask = var_4881_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4881_cast_fp16")]; tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 20, 64])]; tensor var_4891_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4891_cast_fp16")]; tensor const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_179_cast_fp16 = mul(x = var_4891_cast_fp16, y = const_248_to_fp16)[name = string("q_179_cast_fp16")]; tensor concat_497x = const()[name = string("concat_497x"), val = tensor([1, -1, 20, 64])]; tensor var_4898_cast_fp16 = reshape(shape = concat_497x, x = var_4878_cast_fp16)[name = string("op_4898_cast_fp16")]; tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_225_cast_fp16 = mul(x = var_4898_cast_fp16, y = const_249_to_fp16)[name = string("k_225_cast_fp16")]; tensor concat_498x = const()[name = string("concat_498x"), val = tensor([1, -1, 20, 64])]; tensor var_4905_cast_fp16 = reshape(shape = concat_498x, x = var_4881_cast_fp16)[name = string("op_4905_cast_fp16")]; tensor var_4906 = const()[name = string("op_4906"), val = tensor([0, 2, 1, 3])]; bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)]; bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)]; tensor transpose_345_perm_0 = const()[name = string("transpose_345_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_346_perm_0 = const()[name = string("transpose_346_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_346 = transpose(perm = transpose_346_perm_0, x = k_225_cast_fp16)[name = string("transpose_462")]; tensor transpose_345 = transpose(perm = transpose_345_perm_0, x = q_179_cast_fp16)[name = string("transpose_463")]; tensor qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_345, y = transpose_346)[name = string("qk_133_cast_fp16")]; int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)]; int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")]; tensor var_4909_begin_0 = const()[name = string("op_4909_begin_0"), val = tensor([0, 0])]; tensor var_4909_end_mask_0 = const()[name = string("op_4909_end_mask_0"), val = tensor([false, true])]; tensor var_4909_cast_fp16 = slice_by_index(begin = var_4909_begin_0, end = concat_499, end_mask = var_4909_end_mask_0, x = mask_to_fp16)[name = string("op_4909_cast_fp16")]; int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)]; int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)]; bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)]; tensor concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")]; tensor var_4910_begin_0 = const()[name = string("op_4910_begin_0"), val = tensor([0, 0])]; tensor var_4910_end_mask_0 = const()[name = string("op_4910_end_mask_0"), val = tensor([true, false])]; tensor var_4910_cast_fp16 = slice_by_index(begin = var_4910_begin_0, end = concat_500, end_mask = var_4910_end_mask_0, x = var_4909_cast_fp16)[name = string("op_4910_cast_fp16")]; tensor qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4910_cast_fp16)[name = string("qk_135_cast_fp16")]; tensor var_4913_cast_fp16 = softmax(axis = var_4822, x = qk_135_cast_fp16)[name = string("op_4913_cast_fp16")]; bool var_4915_transpose_x_0 = const()[name = string("op_4915_transpose_x_0"), val = bool(false)]; bool var_4915_transpose_y_0 = const()[name = string("op_4915_transpose_y_0"), val = bool(false)]; tensor v_225_cast_fp16 = transpose(perm = var_4906, x = var_4905_cast_fp16)[name = string("transpose_464")]; tensor var_4915_cast_fp16 = matmul(transpose_x = var_4915_transpose_x_0, transpose_y = var_4915_transpose_y_0, x = var_4913_cast_fp16, y = v_225_cast_fp16)[name = string("op_4915_cast_fp16")]; tensor var_4916 = const()[name = string("op_4916"), val = tensor([0, 2, 1, 3])]; tensor concat_501x = const()[name = string("concat_501x"), val = tensor([1, -1, 1280])]; tensor var_4917_cast_fp16 = transpose(perm = var_4916, x = var_4915_cast_fp16)[name = string("transpose_461")]; tensor x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4917_cast_fp16)[name = string("x_403_cast_fp16")]; tensor var_4921_to_fp16 = const()[name = string("op_4921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158194368)))]; tensor var_4922_to_fp16 = const()[name = string("op_4922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161471232)))]; tensor linear_179_cast_fp16 = linear(bias = var_4922_to_fp16, weight = var_4921_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")]; tensor var_4929_axes_0 = const()[name = string("op_4929_axes_0"), val = tensor([-1])]; tensor blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161473856)))]; tensor blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161476480)))]; tensor var_4929_cast_fp16 = layer_norm(axes = var_4929_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4929_cast_fp16")]; tensor var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161479104)))]; tensor var_4939_to_fp16 = const()[name = string("op_4939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164755968)))]; tensor linear_180_cast_fp16 = linear(bias = var_4939_to_fp16, weight = var_4938_to_fp16, x = var_4929_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor concat_502 = const()[name = string("concat_502"), val = tensor([0, 0, 0])]; tensor concat_503 = const()[name = string("concat_503"), val = tensor([0, 1500, 0])]; tensor k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")]; tensor concat_504 = const()[name = string("concat_504"), val = tensor([0, 0, 0])]; tensor concat_505 = const()[name = string("concat_505"), val = tensor([0, 1500, 0])]; tensor v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")]; tensor concat_506x = const()[name = string("concat_506x"), val = tensor([1, -1, 20, 64])]; tensor var_4959_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4959_cast_fp16")]; tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_183_cast_fp16 = mul(x = var_4959_cast_fp16, y = const_250_to_fp16)[name = string("q_183_cast_fp16")]; tensor var_4965 = const()[name = string("op_4965"), val = tensor([1, 1500, 20, -1])]; tensor var_4966_cast_fp16 = reshape(shape = var_4965, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4966_cast_fp16")]; tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_229_cast_fp16 = mul(x = var_4966_cast_fp16, y = const_251_to_fp16)[name = string("k_229_cast_fp16")]; tensor var_4972 = const()[name = string("op_4972"), val = tensor([1, 1500, 20, -1])]; tensor var_4973_cast_fp16 = reshape(shape = var_4972, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4973_cast_fp16")]; tensor var_4974 = const()[name = string("op_4974"), val = tensor([0, 2, 1, 3])]; bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)]; bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)]; tensor transpose_347_perm_0 = const()[name = string("transpose_347_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_348_perm_0 = const()[name = string("transpose_348_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_348 = transpose(perm = transpose_348_perm_0, x = k_229_cast_fp16)[name = string("transpose_458")]; tensor transpose_347 = transpose(perm = transpose_347_perm_0, x = q_183_cast_fp16)[name = string("transpose_459")]; tensor qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_347, y = transpose_348)[name = string("qk_137_cast_fp16")]; tensor var_4978_cast_fp16 = softmax(axis = var_4822, x = qk_137_cast_fp16)[name = string("op_4978_cast_fp16")]; bool var_4980_transpose_x_0 = const()[name = string("op_4980_transpose_x_0"), val = bool(false)]; bool var_4980_transpose_y_0 = const()[name = string("op_4980_transpose_y_0"), val = bool(false)]; tensor v_229_cast_fp16 = transpose(perm = var_4974, x = var_4973_cast_fp16)[name = string("transpose_460")]; tensor var_4980_cast_fp16 = matmul(transpose_x = var_4980_transpose_x_0, transpose_y = var_4980_transpose_y_0, x = var_4978_cast_fp16, y = v_229_cast_fp16)[name = string("op_4980_cast_fp16")]; tensor var_4981 = const()[name = string("op_4981"), val = tensor([0, 2, 1, 3])]; tensor concat_507x = const()[name = string("concat_507x"), val = tensor([1, -1, 1280])]; tensor var_4982_cast_fp16 = transpose(perm = var_4981, x = var_4980_cast_fp16)[name = string("transpose_457")]; tensor x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4982_cast_fp16)[name = string("x_409_cast_fp16")]; tensor var_4986_to_fp16 = const()[name = string("op_4986_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164758592)))]; tensor var_4987_to_fp16 = const()[name = string("op_4987_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168035456)))]; tensor linear_181_cast_fp16 = linear(bias = var_4987_to_fp16, weight = var_4986_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")]; tensor var_4994_axes_0 = const()[name = string("op_4994_axes_0"), val = tensor([-1])]; tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168038080)))]; tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168040704)))]; tensor var_4994_cast_fp16 = layer_norm(axes = var_4994_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4994_cast_fp16")]; tensor var_5003_to_fp16 = const()[name = string("op_5003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168043328)))]; tensor var_5004_to_fp16 = const()[name = string("op_5004_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181150592)))]; tensor linear_182_cast_fp16 = linear(bias = var_5004_to_fp16, weight = var_5003_to_fp16, x = var_4994_cast_fp16)[name = string("linear_182_cast_fp16")]; string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")]; tensor x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")]; tensor var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181160896)))]; tensor var_5010_to_fp16 = const()[name = string("op_5010_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194268160)))]; tensor linear_183_cast_fp16 = linear(bias = var_5010_to_fp16, weight = var_5009_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")]; tensor k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; tensor k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor([24, 1, 448, 1280])]; tensor k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_108)[name = string("k_cache_93_cast_fp16")]; tensor v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; tensor v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor([24, 1, 448, 1280])]; tensor v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_109)[name = string("v_cache_93_cast_fp16")]; tensor k_cache_95_begin_0 = const()[name = string("k_cache_95_begin_0"), val = tensor([23, 0, 0, 0])]; tensor k_cache_95_end_0 = const()[name = string("k_cache_95_end_0"), val = tensor([24, 1, 1500, 1280])]; tensor k_cache_95_end_mask_0 = const()[name = string("k_cache_95_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_95_squeeze_mask_0 = const()[name = string("k_cache_95_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_95_cast_fp16 = slice_by_index(begin = k_cache_95_begin_0, end = k_cache_95_end_0, end_mask = k_cache_95_end_mask_0, squeeze_mask = k_cache_95_squeeze_mask_0, x = read_state_2)[name = string("k_cache_95_cast_fp16")]; tensor v_cache_95_begin_0 = const()[name = string("v_cache_95_begin_0"), val = tensor([23, 0, 0, 0])]; tensor v_cache_95_end_0 = const()[name = string("v_cache_95_end_0"), val = tensor([24, 1, 1500, 1280])]; tensor v_cache_95_end_mask_0 = const()[name = string("v_cache_95_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_95_squeeze_mask_0 = const()[name = string("v_cache_95_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_95_cast_fp16 = slice_by_index(begin = v_cache_95_begin_0, end = v_cache_95_end_0, end_mask = v_cache_95_end_mask_0, squeeze_mask = v_cache_95_squeeze_mask_0, x = read_state_3)[name = string("v_cache_95_cast_fp16")]; int32 var_5033 = const()[name = string("op_5033"), val = int32(-1)]; tensor var_5051_axes_0 = const()[name = string("op_5051_axes_0"), val = tensor([-1])]; tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194270784)))]; tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194273408)))]; fp16 var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5051_cast_fp16 = layer_norm(axes = var_5051_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5051_cast_fp16")]; tensor var_5062_to_fp16 = const()[name = string("op_5062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194276032)))]; tensor var_5063_to_fp16 = const()[name = string("op_5063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197552896)))]; tensor linear_184_cast_fp16 = linear(bias = var_5063_to_fp16, weight = var_5062_to_fp16, x = var_5051_cast_fp16)[name = string("linear_184_cast_fp16")]; tensor var_5066_to_fp16 = const()[name = string("op_5066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197555520)))]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5066_to_fp16, x = var_5051_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor var_5070_to_fp16 = const()[name = string("op_5070_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200832384)))]; tensor var_5071_to_fp16 = const()[name = string("op_5071_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204109248)))]; tensor linear_186_cast_fp16 = linear(bias = var_5071_to_fp16, weight = var_5070_to_fp16, x = var_5051_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_5073_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5073_shape_cast_fp16")]; int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)]; int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)]; bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)]; string var_5073_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5073_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)]; tensor var_5073_shape_cast_fp16_to_uint16 = cast(dtype = var_5073_shape_cast_fp16_to_uint16_dtype_0, x = var_5073_shape_cast_fp16)[name = string("cast_344")]; uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5073_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")]; string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_343")]; int32 end_step_49 = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step_49")]; tensor expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor([0])]; tensor expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor([0])]; tensor expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor([0])]; tensor expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step_49)[name = string("expand_dims_371")]; tensor concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor([23])]; int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)]; bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)]; tensor concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")]; tensor concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor([0])]; tensor concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor([0])]; tensor concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor([0])]; int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")]; tensor k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_108)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_110_write_state")]; tensor coreml_update_state_110 = read_state(input = k_cache1)[name = string("coreml_update_state_110")]; tensor v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_109)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_111_write_state")]; tensor coreml_update_state_111 = read_state(input = v_cache1)[name = string("coreml_update_state_111")]; int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)]; int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1280)]; int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)]; bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)]; tensor concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step_49, concat_516_values2_0))[name = string("concat_516")]; tensor var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor([0, 0, 0])]; tensor var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor([true, false, true])]; tensor var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_516, end_mask = var_5089_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5089_cast_fp16")]; tensor var_5092_begin_0 = const()[name = string("op_5092_begin_0"), val = tensor([0, 0, 0])]; tensor var_5092_end_mask_0 = const()[name = string("op_5092_end_mask_0"), val = tensor([true, false, true])]; tensor var_5092_cast_fp16 = slice_by_index(begin = var_5092_begin_0, end = concat_516, end_mask = var_5092_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5092_cast_fp16")]; tensor concat_518x = const()[name = string("concat_518x"), val = tensor([1, -1, 20, 64])]; tensor var_5102_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5102_cast_fp16")]; tensor const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_187_cast_fp16 = mul(x = var_5102_cast_fp16, y = const_252_to_fp16)[name = string("q_187_cast_fp16")]; tensor concat_519x = const()[name = string("concat_519x"), val = tensor([1, -1, 20, 64])]; tensor var_5109_cast_fp16 = reshape(shape = concat_519x, x = var_5089_cast_fp16)[name = string("op_5109_cast_fp16")]; tensor const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_235_cast_fp16 = mul(x = var_5109_cast_fp16, y = const_253_to_fp16)[name = string("k_235_cast_fp16")]; tensor concat_520x = const()[name = string("concat_520x"), val = tensor([1, -1, 20, 64])]; tensor var_5116_cast_fp16 = reshape(shape = concat_520x, x = var_5092_cast_fp16)[name = string("op_5116_cast_fp16")]; tensor var_5117 = const()[name = string("op_5117"), val = tensor([0, 2, 1, 3])]; bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)]; bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)]; tensor transpose_349_perm_0 = const()[name = string("transpose_349_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_350_perm_0 = const()[name = string("transpose_350_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_350 = transpose(perm = transpose_350_perm_0, x = k_235_cast_fp16)[name = string("transpose_454")]; tensor transpose_349 = transpose(perm = transpose_349_perm_0, x = q_187_cast_fp16)[name = string("transpose_455")]; tensor qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_349, y = transpose_350)[name = string("qk_139_cast_fp16")]; int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)]; int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)]; bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)]; tensor concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")]; tensor var_5120_begin_0 = const()[name = string("op_5120_begin_0"), val = tensor([0, 0])]; tensor var_5120_end_mask_0 = const()[name = string("op_5120_end_mask_0"), val = tensor([false, true])]; tensor var_5120_cast_fp16 = slice_by_index(begin = var_5120_begin_0, end = concat_521, end_mask = var_5120_end_mask_0, x = mask_to_fp16)[name = string("op_5120_cast_fp16")]; int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)]; int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)]; bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)]; tensor concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")]; tensor var_5121_begin_0 = const()[name = string("op_5121_begin_0"), val = tensor([0, 0])]; tensor var_5121_end_mask_0 = const()[name = string("op_5121_end_mask_0"), val = tensor([true, false])]; tensor var_5121_cast_fp16 = slice_by_index(begin = var_5121_begin_0, end = concat_522, end_mask = var_5121_end_mask_0, x = var_5120_cast_fp16)[name = string("op_5121_cast_fp16")]; tensor qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5121_cast_fp16)[name = string("qk_141_cast_fp16")]; tensor var_5124_cast_fp16 = softmax(axis = var_5033, x = qk_141_cast_fp16)[name = string("op_5124_cast_fp16")]; bool var_5126_transpose_x_0 = const()[name = string("op_5126_transpose_x_0"), val = bool(false)]; bool var_5126_transpose_y_0 = const()[name = string("op_5126_transpose_y_0"), val = bool(false)]; tensor v_235_cast_fp16 = transpose(perm = var_5117, x = var_5116_cast_fp16)[name = string("transpose_456")]; tensor var_5126_cast_fp16 = matmul(transpose_x = var_5126_transpose_x_0, transpose_y = var_5126_transpose_y_0, x = var_5124_cast_fp16, y = v_235_cast_fp16)[name = string("op_5126_cast_fp16")]; tensor var_5127 = const()[name = string("op_5127"), val = tensor([0, 2, 1, 3])]; tensor concat_523x = const()[name = string("concat_523x"), val = tensor([1, -1, 1280])]; tensor var_5128_cast_fp16 = transpose(perm = var_5127, x = var_5126_cast_fp16)[name = string("transpose_453")]; tensor x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5128_cast_fp16)[name = string("x_421_cast_fp16")]; tensor var_5132_to_fp16 = const()[name = string("op_5132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204111872)))]; tensor var_5133_to_fp16 = const()[name = string("op_5133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207388736)))]; tensor linear_187_cast_fp16 = linear(bias = var_5133_to_fp16, weight = var_5132_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")]; tensor var_5140_axes_0 = const()[name = string("op_5140_axes_0"), val = tensor([-1])]; tensor blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207391360)))]; tensor blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207393984)))]; tensor var_5140_cast_fp16 = layer_norm(axes = var_5140_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5140_cast_fp16")]; tensor var_5149_to_fp16 = const()[name = string("op_5149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207396608)))]; tensor var_5150_to_fp16 = const()[name = string("op_5150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210673472)))]; tensor linear_188_cast_fp16 = linear(bias = var_5150_to_fp16, weight = var_5149_to_fp16, x = var_5140_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor concat_524 = const()[name = string("concat_524"), val = tensor([0, 0, 0])]; tensor concat_525 = const()[name = string("concat_525"), val = tensor([0, 1500, 0])]; tensor k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")]; tensor concat_526 = const()[name = string("concat_526"), val = tensor([0, 0, 0])]; tensor concat_527 = const()[name = string("concat_527"), val = tensor([0, 1500, 0])]; tensor v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")]; tensor concat_528x = const()[name = string("concat_528x"), val = tensor([1, -1, 20, 64])]; tensor var_5170_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5170_cast_fp16")]; tensor const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_191_cast_fp16 = mul(x = var_5170_cast_fp16, y = const_254_to_fp16)[name = string("q_191_cast_fp16")]; tensor var_5176 = const()[name = string("op_5176"), val = tensor([1, 1500, 20, -1])]; tensor var_5177_cast_fp16 = reshape(shape = var_5176, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5177_cast_fp16")]; tensor const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_239_cast_fp16 = mul(x = var_5177_cast_fp16, y = const_255_to_fp16)[name = string("k_239_cast_fp16")]; tensor var_5183 = const()[name = string("op_5183"), val = tensor([1, 1500, 20, -1])]; tensor var_5184_cast_fp16 = reshape(shape = var_5183, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5184_cast_fp16")]; tensor var_5185 = const()[name = string("op_5185"), val = tensor([0, 2, 1, 3])]; bool qk_143_transpose_x_0 = const()[name = string("qk_143_transpose_x_0"), val = bool(false)]; bool qk_143_transpose_y_0 = const()[name = string("qk_143_transpose_y_0"), val = bool(false)]; tensor transpose_351_perm_0 = const()[name = string("transpose_351_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_352_perm_0 = const()[name = string("transpose_352_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_352 = transpose(perm = transpose_352_perm_0, x = k_239_cast_fp16)[name = string("transpose_450")]; tensor transpose_351 = transpose(perm = transpose_351_perm_0, x = q_191_cast_fp16)[name = string("transpose_451")]; tensor qk_143_cast_fp16 = matmul(transpose_x = qk_143_transpose_x_0, transpose_y = qk_143_transpose_y_0, x = transpose_351, y = transpose_352)[name = string("qk_143_cast_fp16")]; tensor var_5189_cast_fp16 = softmax(axis = var_5033, x = qk_143_cast_fp16)[name = string("op_5189_cast_fp16")]; bool var_5191_transpose_x_0 = const()[name = string("op_5191_transpose_x_0"), val = bool(false)]; bool var_5191_transpose_y_0 = const()[name = string("op_5191_transpose_y_0"), val = bool(false)]; tensor v_239_cast_fp16 = transpose(perm = var_5185, x = var_5184_cast_fp16)[name = string("transpose_452")]; tensor var_5191_cast_fp16 = matmul(transpose_x = var_5191_transpose_x_0, transpose_y = var_5191_transpose_y_0, x = var_5189_cast_fp16, y = v_239_cast_fp16)[name = string("op_5191_cast_fp16")]; tensor var_5192 = const()[name = string("op_5192"), val = tensor([0, 2, 1, 3])]; tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, -1, 1280])]; tensor var_5193_cast_fp16 = transpose(perm = var_5192, x = var_5191_cast_fp16)[name = string("transpose_449")]; tensor x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5193_cast_fp16)[name = string("x_427_cast_fp16")]; tensor var_5197_to_fp16 = const()[name = string("op_5197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210676096)))]; tensor var_5198_to_fp16 = const()[name = string("op_5198_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213952960)))]; tensor linear_189_cast_fp16 = linear(bias = var_5198_to_fp16, weight = var_5197_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")]; tensor var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor([-1])]; tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213955584)))]; tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213958208)))]; tensor var_5205_cast_fp16 = layer_norm(axes = var_5205_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5205_cast_fp16")]; tensor var_5214_to_fp16 = const()[name = string("op_5214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213960832)))]; tensor var_5215_to_fp16 = const()[name = string("op_5215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227068096)))]; tensor linear_190_cast_fp16 = linear(bias = var_5215_to_fp16, weight = var_5214_to_fp16, x = var_5205_cast_fp16)[name = string("linear_190_cast_fp16")]; string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")]; tensor x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")]; tensor var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227078400)))]; tensor var_5221_to_fp16 = const()[name = string("op_5221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240185664)))]; tensor linear_191_cast_fp16 = linear(bias = var_5221_to_fp16, weight = var_5220_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")]; tensor x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")]; tensor k_cache_97_begin_0 = const()[name = string("k_cache_97_begin_0"), val = tensor([24, 0, 0, 0])]; tensor k_cache_97_end_0 = const()[name = string("k_cache_97_end_0"), val = tensor([25, 1, 448, 1280])]; tensor k_cache_97_end_mask_0 = const()[name = string("k_cache_97_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_97_squeeze_mask_0 = const()[name = string("k_cache_97_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_97_cast_fp16 = slice_by_index(begin = k_cache_97_begin_0, end = k_cache_97_end_0, end_mask = k_cache_97_end_mask_0, squeeze_mask = k_cache_97_squeeze_mask_0, x = coreml_update_state_110)[name = string("k_cache_97_cast_fp16")]; tensor v_cache_97_begin_0 = const()[name = string("v_cache_97_begin_0"), val = tensor([24, 0, 0, 0])]; tensor v_cache_97_end_0 = const()[name = string("v_cache_97_end_0"), val = tensor([25, 1, 448, 1280])]; tensor v_cache_97_end_mask_0 = const()[name = string("v_cache_97_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_97_squeeze_mask_0 = const()[name = string("v_cache_97_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_97_cast_fp16 = slice_by_index(begin = v_cache_97_begin_0, end = v_cache_97_end_0, end_mask = v_cache_97_end_mask_0, squeeze_mask = v_cache_97_squeeze_mask_0, x = coreml_update_state_111)[name = string("v_cache_97_cast_fp16")]; tensor k_cache_99_begin_0 = const()[name = string("k_cache_99_begin_0"), val = tensor([24, 0, 0, 0])]; tensor k_cache_99_end_0 = const()[name = string("k_cache_99_end_0"), val = tensor([25, 1, 1500, 1280])]; tensor k_cache_99_end_mask_0 = const()[name = string("k_cache_99_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_99_squeeze_mask_0 = const()[name = string("k_cache_99_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_99_cast_fp16 = slice_by_index(begin = k_cache_99_begin_0, end = k_cache_99_end_0, end_mask = k_cache_99_end_mask_0, squeeze_mask = k_cache_99_squeeze_mask_0, x = read_state_2)[name = string("k_cache_99_cast_fp16")]; tensor v_cache_99_begin_0 = const()[name = string("v_cache_99_begin_0"), val = tensor([24, 0, 0, 0])]; tensor v_cache_99_end_0 = const()[name = string("v_cache_99_end_0"), val = tensor([25, 1, 1500, 1280])]; tensor v_cache_99_end_mask_0 = const()[name = string("v_cache_99_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_99_squeeze_mask_0 = const()[name = string("v_cache_99_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_99_cast_fp16 = slice_by_index(begin = v_cache_99_begin_0, end = v_cache_99_end_0, end_mask = v_cache_99_end_mask_0, squeeze_mask = v_cache_99_squeeze_mask_0, x = read_state_3)[name = string("v_cache_99_cast_fp16")]; int32 var_5244 = const()[name = string("op_5244"), val = int32(-1)]; tensor var_5262_axes_0 = const()[name = string("op_5262_axes_0"), val = tensor([-1])]; tensor blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240188288)))]; tensor blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240190912)))]; fp16 var_5250_to_fp16 = const()[name = string("op_5250_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5262_cast_fp16 = layer_norm(axes = var_5262_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5262_cast_fp16")]; tensor var_5273_to_fp16 = const()[name = string("op_5273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240193536)))]; tensor var_5274_to_fp16 = const()[name = string("op_5274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243470400)))]; tensor linear_192_cast_fp16 = linear(bias = var_5274_to_fp16, weight = var_5273_to_fp16, x = var_5262_cast_fp16)[name = string("linear_192_cast_fp16")]; tensor var_5277_to_fp16 = const()[name = string("op_5277_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243473024)))]; tensor linear_193_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5277_to_fp16, x = var_5262_cast_fp16)[name = string("linear_193_cast_fp16")]; tensor var_5281_to_fp16 = const()[name = string("op_5281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246749888)))]; tensor var_5282_to_fp16 = const()[name = string("op_5282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250026752)))]; tensor linear_194_cast_fp16 = linear(bias = var_5282_to_fp16, weight = var_5281_to_fp16, x = var_5262_cast_fp16)[name = string("linear_194_cast_fp16")]; tensor var_5284_shape_cast_fp16 = shape(x = linear_192_cast_fp16)[name = string("op_5284_shape_cast_fp16")]; int32 gather_290_axis_0 = const()[name = string("gather_290_axis_0"), val = int32(0)]; int32 gather_290_batch_dims_0 = const()[name = string("gather_290_batch_dims_0"), val = int32(0)]; bool gather_290_validate_indices_0 = const()[name = string("gather_290_validate_indices_0"), val = bool(false)]; string var_5284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_290_to_uint16 = const()[name = string("select_290_to_uint16"), val = uint16(1)]; tensor var_5284_shape_cast_fp16_to_uint16 = cast(dtype = var_5284_shape_cast_fp16_to_uint16_dtype_0, x = var_5284_shape_cast_fp16)[name = string("cast_342")]; uint16 gather_290_cast_uint16 = gather(axis = gather_290_axis_0, batch_dims = gather_290_batch_dims_0, indices = select_290_to_uint16, validate_indices = gather_290_validate_indices_0, x = var_5284_shape_cast_fp16_to_uint16)[name = string("gather_290_cast_uint16")]; string gather_290_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_290_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_290_cast_uint16_to_int32 = cast(dtype = gather_290_cast_uint16_to_int32_dtype_0, x = gather_290_cast_uint16)[name = string("cast_341")]; int32 end_step_51 = add(x = offset, y = gather_290_cast_uint16_to_int32)[name = string("end_step_51")]; tensor expand_dims_384 = const()[name = string("expand_dims_384"), val = tensor([0])]; tensor expand_dims_386 = const()[name = string("expand_dims_386"), val = tensor([0])]; tensor expand_dims_387_axes_0 = const()[name = string("expand_dims_387_axes_0"), val = tensor([0])]; tensor expand_dims_387 = expand_dims(axes = expand_dims_387_axes_0, x = end_step_51)[name = string("expand_dims_387")]; tensor concat_532_values0_0 = const()[name = string("concat_532_values0_0"), val = tensor([24])]; int32 concat_532_axis_0 = const()[name = string("concat_532_axis_0"), val = int32(0)]; bool concat_532_interleave_0 = const()[name = string("concat_532_interleave_0"), val = bool(false)]; tensor concat_532 = concat(axis = concat_532_axis_0, interleave = concat_532_interleave_0, values = (concat_532_values0_0, expand_dims_384, expand_dims_1, expand_dims_386))[name = string("concat_532")]; tensor concat_533_values0_0 = const()[name = string("concat_533_values0_0"), val = tensor([0])]; tensor concat_533_values1_0 = const()[name = string("concat_533_values1_0"), val = tensor([0])]; tensor concat_533_values3_0 = const()[name = string("concat_533_values3_0"), val = tensor([0])]; int32 concat_533_axis_0 = const()[name = string("concat_533_axis_0"), val = int32(0)]; bool concat_533_interleave_0 = const()[name = string("concat_533_interleave_0"), val = bool(false)]; tensor concat_533 = concat(axis = concat_533_axis_0, interleave = concat_533_interleave_0, values = (concat_533_values0_0, concat_533_values1_0, expand_dims_387, concat_533_values3_0))[name = string("concat_533")]; tensor k_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = k_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = k_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_25_stride_0, update = linear_193_cast_fp16, x = coreml_update_state_110)[name = string("k_cache1_internal_tensor_assign_25_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_25_cast_fp16, input = k_cache1)[name = string("coreml_update_state_112_write_state")]; tensor coreml_update_state_112 = read_state(input = k_cache1)[name = string("coreml_update_state_112")]; tensor v_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = v_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = v_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_25_stride_0, update = linear_194_cast_fp16, x = coreml_update_state_111)[name = string("v_cache1_internal_tensor_assign_25_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_25_cast_fp16, input = v_cache1)[name = string("coreml_update_state_113_write_state")]; tensor coreml_update_state_113 = read_state(input = v_cache1)[name = string("coreml_update_state_113")]; int32 concat_538_values0_0 = const()[name = string("concat_538_values0_0"), val = int32(1)]; int32 concat_538_values2_0 = const()[name = string("concat_538_values2_0"), val = int32(1280)]; int32 concat_538_axis_0 = const()[name = string("concat_538_axis_0"), val = int32(0)]; bool concat_538_interleave_0 = const()[name = string("concat_538_interleave_0"), val = bool(false)]; tensor concat_538 = concat(axis = concat_538_axis_0, interleave = concat_538_interleave_0, values = (concat_538_values0_0, end_step_51, concat_538_values2_0))[name = string("concat_538")]; tensor var_5300_begin_0 = const()[name = string("op_5300_begin_0"), val = tensor([0, 0, 0])]; tensor var_5300_end_mask_0 = const()[name = string("op_5300_end_mask_0"), val = tensor([true, false, true])]; tensor var_5300_cast_fp16 = slice_by_index(begin = var_5300_begin_0, end = concat_538, end_mask = var_5300_end_mask_0, x = k_cache_97_cast_fp16)[name = string("op_5300_cast_fp16")]; tensor var_5303_begin_0 = const()[name = string("op_5303_begin_0"), val = tensor([0, 0, 0])]; tensor var_5303_end_mask_0 = const()[name = string("op_5303_end_mask_0"), val = tensor([true, false, true])]; tensor var_5303_cast_fp16 = slice_by_index(begin = var_5303_begin_0, end = concat_538, end_mask = var_5303_end_mask_0, x = v_cache_97_cast_fp16)[name = string("op_5303_cast_fp16")]; tensor concat_540x = const()[name = string("concat_540x"), val = tensor([1, -1, 20, 64])]; tensor var_5313_cast_fp16 = reshape(shape = concat_540x, x = linear_192_cast_fp16)[name = string("op_5313_cast_fp16")]; tensor const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_195_cast_fp16 = mul(x = var_5313_cast_fp16, y = const_256_to_fp16)[name = string("q_195_cast_fp16")]; tensor concat_541x = const()[name = string("concat_541x"), val = tensor([1, -1, 20, 64])]; tensor var_5320_cast_fp16 = reshape(shape = concat_541x, x = var_5300_cast_fp16)[name = string("op_5320_cast_fp16")]; tensor const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_245_cast_fp16 = mul(x = var_5320_cast_fp16, y = const_257_to_fp16)[name = string("k_245_cast_fp16")]; tensor concat_542x = const()[name = string("concat_542x"), val = tensor([1, -1, 20, 64])]; tensor var_5327_cast_fp16 = reshape(shape = concat_542x, x = var_5303_cast_fp16)[name = string("op_5327_cast_fp16")]; tensor var_5328 = const()[name = string("op_5328"), val = tensor([0, 2, 1, 3])]; bool qk_145_transpose_x_0 = const()[name = string("qk_145_transpose_x_0"), val = bool(false)]; bool qk_145_transpose_y_0 = const()[name = string("qk_145_transpose_y_0"), val = bool(false)]; tensor transpose_353_perm_0 = const()[name = string("transpose_353_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_354_perm_0 = const()[name = string("transpose_354_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_354 = transpose(perm = transpose_354_perm_0, x = k_245_cast_fp16)[name = string("transpose_446")]; tensor transpose_353 = transpose(perm = transpose_353_perm_0, x = q_195_cast_fp16)[name = string("transpose_447")]; tensor qk_145_cast_fp16 = matmul(transpose_x = qk_145_transpose_x_0, transpose_y = qk_145_transpose_y_0, x = transpose_353, y = transpose_354)[name = string("qk_145_cast_fp16")]; int32 concat_543_values1_0 = const()[name = string("concat_543_values1_0"), val = int32(448)]; int32 concat_543_axis_0 = const()[name = string("concat_543_axis_0"), val = int32(0)]; bool concat_543_interleave_0 = const()[name = string("concat_543_interleave_0"), val = bool(false)]; tensor concat_543 = concat(axis = concat_543_axis_0, interleave = concat_543_interleave_0, values = (gather_290_cast_uint16_to_int32, concat_543_values1_0))[name = string("concat_543")]; tensor var_5331_begin_0 = const()[name = string("op_5331_begin_0"), val = tensor([0, 0])]; tensor var_5331_end_mask_0 = const()[name = string("op_5331_end_mask_0"), val = tensor([false, true])]; tensor var_5331_cast_fp16 = slice_by_index(begin = var_5331_begin_0, end = concat_543, end_mask = var_5331_end_mask_0, x = mask_to_fp16)[name = string("op_5331_cast_fp16")]; int32 concat_544_values0_0 = const()[name = string("concat_544_values0_0"), val = int32(0)]; int32 concat_544_axis_0 = const()[name = string("concat_544_axis_0"), val = int32(0)]; bool concat_544_interleave_0 = const()[name = string("concat_544_interleave_0"), val = bool(false)]; tensor concat_544 = concat(axis = concat_544_axis_0, interleave = concat_544_interleave_0, values = (concat_544_values0_0, gather_290_cast_uint16_to_int32))[name = string("concat_544")]; tensor var_5332_begin_0 = const()[name = string("op_5332_begin_0"), val = tensor([0, 0])]; tensor var_5332_end_mask_0 = const()[name = string("op_5332_end_mask_0"), val = tensor([true, false])]; tensor var_5332_cast_fp16 = slice_by_index(begin = var_5332_begin_0, end = concat_544, end_mask = var_5332_end_mask_0, x = var_5331_cast_fp16)[name = string("op_5332_cast_fp16")]; tensor qk_147_cast_fp16 = add(x = qk_145_cast_fp16, y = var_5332_cast_fp16)[name = string("qk_147_cast_fp16")]; tensor var_5335_cast_fp16 = softmax(axis = var_5244, x = qk_147_cast_fp16)[name = string("op_5335_cast_fp16")]; bool var_5337_transpose_x_0 = const()[name = string("op_5337_transpose_x_0"), val = bool(false)]; bool var_5337_transpose_y_0 = const()[name = string("op_5337_transpose_y_0"), val = bool(false)]; tensor v_245_cast_fp16 = transpose(perm = var_5328, x = var_5327_cast_fp16)[name = string("transpose_448")]; tensor var_5337_cast_fp16 = matmul(transpose_x = var_5337_transpose_x_0, transpose_y = var_5337_transpose_y_0, x = var_5335_cast_fp16, y = v_245_cast_fp16)[name = string("op_5337_cast_fp16")]; tensor var_5338 = const()[name = string("op_5338"), val = tensor([0, 2, 1, 3])]; tensor concat_545x = const()[name = string("concat_545x"), val = tensor([1, -1, 1280])]; tensor var_5339_cast_fp16 = transpose(perm = var_5338, x = var_5337_cast_fp16)[name = string("transpose_445")]; tensor x_439_cast_fp16 = reshape(shape = concat_545x, x = var_5339_cast_fp16)[name = string("x_439_cast_fp16")]; tensor var_5343_to_fp16 = const()[name = string("op_5343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250029376)))]; tensor var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253306240)))]; tensor linear_195_cast_fp16 = linear(bias = var_5344_to_fp16, weight = var_5343_to_fp16, x = x_439_cast_fp16)[name = string("linear_195_cast_fp16")]; tensor x_441_cast_fp16 = add(x = x_435_cast_fp16, y = linear_195_cast_fp16)[name = string("x_441_cast_fp16")]; tensor var_5351_axes_0 = const()[name = string("op_5351_axes_0"), val = tensor([-1])]; tensor blocks_24_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253308864)))]; tensor blocks_24_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253311488)))]; tensor var_5351_cast_fp16 = layer_norm(axes = var_5351_axes_0, beta = blocks_24_cross_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_cross_attn_ln_weight_to_fp16, x = x_441_cast_fp16)[name = string("op_5351_cast_fp16")]; tensor var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253314112)))]; tensor var_5361_to_fp16 = const()[name = string("op_5361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256590976)))]; tensor linear_196_cast_fp16 = linear(bias = var_5361_to_fp16, weight = var_5360_to_fp16, x = var_5351_cast_fp16)[name = string("linear_196_cast_fp16")]; tensor concat_546 = const()[name = string("concat_546"), val = tensor([0, 0, 0])]; tensor concat_547 = const()[name = string("concat_547"), val = tensor([0, 1500, 0])]; tensor k_247_internal_tensor_assign_1_stride_0 = const()[name = string("k_247_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_546, begin_mask = k_247_internal_tensor_assign_1_begin_mask_0, end = concat_547, end_mask = k_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_247_internal_tensor_assign_1_squeeze_mask_0, stride = k_247_internal_tensor_assign_1_stride_0, update = k_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("k_247_internal_tensor_assign_1_cast_fp16")]; tensor concat_548 = const()[name = string("concat_548"), val = tensor([0, 0, 0])]; tensor concat_549 = const()[name = string("concat_549"), val = tensor([0, 1500, 0])]; tensor v_247_internal_tensor_assign_1_stride_0 = const()[name = string("v_247_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_548, begin_mask = v_247_internal_tensor_assign_1_begin_mask_0, end = concat_549, end_mask = v_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_247_internal_tensor_assign_1_squeeze_mask_0, stride = v_247_internal_tensor_assign_1_stride_0, update = v_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("v_247_internal_tensor_assign_1_cast_fp16")]; tensor concat_550x = const()[name = string("concat_550x"), val = tensor([1, -1, 20, 64])]; tensor var_5381_cast_fp16 = reshape(shape = concat_550x, x = linear_196_cast_fp16)[name = string("op_5381_cast_fp16")]; tensor const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_199_cast_fp16 = mul(x = var_5381_cast_fp16, y = const_258_to_fp16)[name = string("q_199_cast_fp16")]; tensor var_5387 = const()[name = string("op_5387"), val = tensor([1, 1500, 20, -1])]; tensor var_5388_cast_fp16 = reshape(shape = var_5387, x = k_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5388_cast_fp16")]; tensor const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_249_cast_fp16 = mul(x = var_5388_cast_fp16, y = const_259_to_fp16)[name = string("k_249_cast_fp16")]; tensor var_5394 = const()[name = string("op_5394"), val = tensor([1, 1500, 20, -1])]; tensor var_5395_cast_fp16 = reshape(shape = var_5394, x = v_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5395_cast_fp16")]; tensor var_5396 = const()[name = string("op_5396"), val = tensor([0, 2, 1, 3])]; bool qk_149_transpose_x_0 = const()[name = string("qk_149_transpose_x_0"), val = bool(false)]; bool qk_149_transpose_y_0 = const()[name = string("qk_149_transpose_y_0"), val = bool(false)]; tensor transpose_355_perm_0 = const()[name = string("transpose_355_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_356_perm_0 = const()[name = string("transpose_356_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_356 = transpose(perm = transpose_356_perm_0, x = k_249_cast_fp16)[name = string("transpose_442")]; tensor transpose_355 = transpose(perm = transpose_355_perm_0, x = q_199_cast_fp16)[name = string("transpose_443")]; tensor qk_149_cast_fp16 = matmul(transpose_x = qk_149_transpose_x_0, transpose_y = qk_149_transpose_y_0, x = transpose_355, y = transpose_356)[name = string("qk_149_cast_fp16")]; tensor var_5400_cast_fp16 = softmax(axis = var_5244, x = qk_149_cast_fp16)[name = string("op_5400_cast_fp16")]; bool var_5402_transpose_x_0 = const()[name = string("op_5402_transpose_x_0"), val = bool(false)]; bool var_5402_transpose_y_0 = const()[name = string("op_5402_transpose_y_0"), val = bool(false)]; tensor v_249_cast_fp16 = transpose(perm = var_5396, x = var_5395_cast_fp16)[name = string("transpose_444")]; tensor var_5402_cast_fp16 = matmul(transpose_x = var_5402_transpose_x_0, transpose_y = var_5402_transpose_y_0, x = var_5400_cast_fp16, y = v_249_cast_fp16)[name = string("op_5402_cast_fp16")]; tensor var_5403 = const()[name = string("op_5403"), val = tensor([0, 2, 1, 3])]; tensor concat_551x = const()[name = string("concat_551x"), val = tensor([1, -1, 1280])]; tensor var_5404_cast_fp16 = transpose(perm = var_5403, x = var_5402_cast_fp16)[name = string("transpose_441")]; tensor x_445_cast_fp16 = reshape(shape = concat_551x, x = var_5404_cast_fp16)[name = string("x_445_cast_fp16")]; tensor var_5408_to_fp16 = const()[name = string("op_5408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256593600)))]; tensor var_5409_to_fp16 = const()[name = string("op_5409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259870464)))]; tensor linear_197_cast_fp16 = linear(bias = var_5409_to_fp16, weight = var_5408_to_fp16, x = x_445_cast_fp16)[name = string("linear_197_cast_fp16")]; tensor x_447_cast_fp16 = add(x = x_441_cast_fp16, y = linear_197_cast_fp16)[name = string("x_447_cast_fp16")]; tensor var_5416_axes_0 = const()[name = string("op_5416_axes_0"), val = tensor([-1])]; tensor blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259873088)))]; tensor blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259875712)))]; tensor var_5416_cast_fp16 = layer_norm(axes = var_5416_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_447_cast_fp16)[name = string("op_5416_cast_fp16")]; tensor var_5425_to_fp16 = const()[name = string("op_5425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259878336)))]; tensor var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272985600)))]; tensor linear_198_cast_fp16 = linear(bias = var_5426_to_fp16, weight = var_5425_to_fp16, x = var_5416_cast_fp16)[name = string("linear_198_cast_fp16")]; string x_451_mode_0 = const()[name = string("x_451_mode_0"), val = string("EXACT")]; tensor x_451_cast_fp16 = gelu(mode = x_451_mode_0, x = linear_198_cast_fp16)[name = string("x_451_cast_fp16")]; tensor var_5431_to_fp16 = const()[name = string("op_5431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272995904)))]; tensor var_5432_to_fp16 = const()[name = string("op_5432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286103168)))]; tensor linear_199_cast_fp16 = linear(bias = var_5432_to_fp16, weight = var_5431_to_fp16, x = x_451_cast_fp16)[name = string("linear_199_cast_fp16")]; tensor x_453_cast_fp16 = add(x = x_447_cast_fp16, y = linear_199_cast_fp16)[name = string("x_453_cast_fp16")]; tensor k_cache_101_begin_0 = const()[name = string("k_cache_101_begin_0"), val = tensor([25, 0, 0, 0])]; tensor k_cache_101_end_0 = const()[name = string("k_cache_101_end_0"), val = tensor([26, 1, 448, 1280])]; tensor k_cache_101_end_mask_0 = const()[name = string("k_cache_101_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_101_squeeze_mask_0 = const()[name = string("k_cache_101_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_101_cast_fp16 = slice_by_index(begin = k_cache_101_begin_0, end = k_cache_101_end_0, end_mask = k_cache_101_end_mask_0, squeeze_mask = k_cache_101_squeeze_mask_0, x = coreml_update_state_112)[name = string("k_cache_101_cast_fp16")]; tensor v_cache_101_begin_0 = const()[name = string("v_cache_101_begin_0"), val = tensor([25, 0, 0, 0])]; tensor v_cache_101_end_0 = const()[name = string("v_cache_101_end_0"), val = tensor([26, 1, 448, 1280])]; tensor v_cache_101_end_mask_0 = const()[name = string("v_cache_101_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_101_squeeze_mask_0 = const()[name = string("v_cache_101_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_101_cast_fp16 = slice_by_index(begin = v_cache_101_begin_0, end = v_cache_101_end_0, end_mask = v_cache_101_end_mask_0, squeeze_mask = v_cache_101_squeeze_mask_0, x = coreml_update_state_113)[name = string("v_cache_101_cast_fp16")]; tensor k_cache_103_begin_0 = const()[name = string("k_cache_103_begin_0"), val = tensor([25, 0, 0, 0])]; tensor k_cache_103_end_0 = const()[name = string("k_cache_103_end_0"), val = tensor([26, 1, 1500, 1280])]; tensor k_cache_103_end_mask_0 = const()[name = string("k_cache_103_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_103_squeeze_mask_0 = const()[name = string("k_cache_103_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_103_cast_fp16 = slice_by_index(begin = k_cache_103_begin_0, end = k_cache_103_end_0, end_mask = k_cache_103_end_mask_0, squeeze_mask = k_cache_103_squeeze_mask_0, x = read_state_2)[name = string("k_cache_103_cast_fp16")]; tensor v_cache_103_begin_0 = const()[name = string("v_cache_103_begin_0"), val = tensor([25, 0, 0, 0])]; tensor v_cache_103_end_0 = const()[name = string("v_cache_103_end_0"), val = tensor([26, 1, 1500, 1280])]; tensor v_cache_103_end_mask_0 = const()[name = string("v_cache_103_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_103_squeeze_mask_0 = const()[name = string("v_cache_103_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_103_cast_fp16 = slice_by_index(begin = v_cache_103_begin_0, end = v_cache_103_end_0, end_mask = v_cache_103_end_mask_0, squeeze_mask = v_cache_103_squeeze_mask_0, x = read_state_3)[name = string("v_cache_103_cast_fp16")]; int32 var_5455 = const()[name = string("op_5455"), val = int32(-1)]; tensor var_5473_axes_0 = const()[name = string("op_5473_axes_0"), val = tensor([-1])]; tensor blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286105792)))]; tensor blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286108416)))]; fp16 var_5461_to_fp16 = const()[name = string("op_5461_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5473_cast_fp16 = layer_norm(axes = var_5473_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_453_cast_fp16)[name = string("op_5473_cast_fp16")]; tensor var_5484_to_fp16 = const()[name = string("op_5484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286111040)))]; tensor var_5485_to_fp16 = const()[name = string("op_5485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289387904)))]; tensor linear_200_cast_fp16 = linear(bias = var_5485_to_fp16, weight = var_5484_to_fp16, x = var_5473_cast_fp16)[name = string("linear_200_cast_fp16")]; tensor var_5488_to_fp16 = const()[name = string("op_5488_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289390528)))]; tensor linear_201_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5488_to_fp16, x = var_5473_cast_fp16)[name = string("linear_201_cast_fp16")]; tensor var_5492_to_fp16 = const()[name = string("op_5492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1292667392)))]; tensor var_5493_to_fp16 = const()[name = string("op_5493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295944256)))]; tensor linear_202_cast_fp16 = linear(bias = var_5493_to_fp16, weight = var_5492_to_fp16, x = var_5473_cast_fp16)[name = string("linear_202_cast_fp16")]; tensor var_5495_shape_cast_fp16 = shape(x = linear_200_cast_fp16)[name = string("op_5495_shape_cast_fp16")]; int32 gather_302_axis_0 = const()[name = string("gather_302_axis_0"), val = int32(0)]; int32 gather_302_batch_dims_0 = const()[name = string("gather_302_batch_dims_0"), val = int32(0)]; bool gather_302_validate_indices_0 = const()[name = string("gather_302_validate_indices_0"), val = bool(false)]; string var_5495_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5495_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_302_to_uint16 = const()[name = string("select_302_to_uint16"), val = uint16(1)]; tensor var_5495_shape_cast_fp16_to_uint16 = cast(dtype = var_5495_shape_cast_fp16_to_uint16_dtype_0, x = var_5495_shape_cast_fp16)[name = string("cast_340")]; uint16 gather_302_cast_uint16 = gather(axis = gather_302_axis_0, batch_dims = gather_302_batch_dims_0, indices = select_302_to_uint16, validate_indices = gather_302_validate_indices_0, x = var_5495_shape_cast_fp16_to_uint16)[name = string("gather_302_cast_uint16")]; string gather_302_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_302_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_302_cast_uint16_to_int32 = cast(dtype = gather_302_cast_uint16_to_int32_dtype_0, x = gather_302_cast_uint16)[name = string("cast_339")]; int32 end_step_53 = add(x = offset, y = gather_302_cast_uint16_to_int32)[name = string("end_step_53")]; tensor expand_dims_400 = const()[name = string("expand_dims_400"), val = tensor([0])]; tensor expand_dims_402 = const()[name = string("expand_dims_402"), val = tensor([0])]; tensor expand_dims_403_axes_0 = const()[name = string("expand_dims_403_axes_0"), val = tensor([0])]; tensor expand_dims_403 = expand_dims(axes = expand_dims_403_axes_0, x = end_step_53)[name = string("expand_dims_403")]; tensor concat_554_values0_0 = const()[name = string("concat_554_values0_0"), val = tensor([25])]; int32 concat_554_axis_0 = const()[name = string("concat_554_axis_0"), val = int32(0)]; bool concat_554_interleave_0 = const()[name = string("concat_554_interleave_0"), val = bool(false)]; tensor concat_554 = concat(axis = concat_554_axis_0, interleave = concat_554_interleave_0, values = (concat_554_values0_0, expand_dims_400, expand_dims_1, expand_dims_402))[name = string("concat_554")]; tensor concat_555_values0_0 = const()[name = string("concat_555_values0_0"), val = tensor([0])]; tensor concat_555_values1_0 = const()[name = string("concat_555_values1_0"), val = tensor([0])]; tensor concat_555_values3_0 = const()[name = string("concat_555_values3_0"), val = tensor([0])]; int32 concat_555_axis_0 = const()[name = string("concat_555_axis_0"), val = int32(0)]; bool concat_555_interleave_0 = const()[name = string("concat_555_interleave_0"), val = bool(false)]; tensor concat_555 = concat(axis = concat_555_axis_0, interleave = concat_555_interleave_0, values = (concat_555_values0_0, concat_555_values1_0, expand_dims_403, concat_555_values3_0))[name = string("concat_555")]; tensor k_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = k_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = k_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_26_stride_0, update = linear_201_cast_fp16, x = coreml_update_state_112)[name = string("k_cache1_internal_tensor_assign_26_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_26_cast_fp16, input = k_cache1)[name = string("coreml_update_state_114_write_state")]; tensor coreml_update_state_114 = read_state(input = k_cache1)[name = string("coreml_update_state_114")]; tensor v_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = v_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = v_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_26_stride_0, update = linear_202_cast_fp16, x = coreml_update_state_113)[name = string("v_cache1_internal_tensor_assign_26_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_26_cast_fp16, input = v_cache1)[name = string("coreml_update_state_115_write_state")]; tensor coreml_update_state_115 = read_state(input = v_cache1)[name = string("coreml_update_state_115")]; int32 concat_560_values0_0 = const()[name = string("concat_560_values0_0"), val = int32(1)]; int32 concat_560_values2_0 = const()[name = string("concat_560_values2_0"), val = int32(1280)]; int32 concat_560_axis_0 = const()[name = string("concat_560_axis_0"), val = int32(0)]; bool concat_560_interleave_0 = const()[name = string("concat_560_interleave_0"), val = bool(false)]; tensor concat_560 = concat(axis = concat_560_axis_0, interleave = concat_560_interleave_0, values = (concat_560_values0_0, end_step_53, concat_560_values2_0))[name = string("concat_560")]; tensor var_5511_begin_0 = const()[name = string("op_5511_begin_0"), val = tensor([0, 0, 0])]; tensor var_5511_end_mask_0 = const()[name = string("op_5511_end_mask_0"), val = tensor([true, false, true])]; tensor var_5511_cast_fp16 = slice_by_index(begin = var_5511_begin_0, end = concat_560, end_mask = var_5511_end_mask_0, x = k_cache_101_cast_fp16)[name = string("op_5511_cast_fp16")]; tensor var_5514_begin_0 = const()[name = string("op_5514_begin_0"), val = tensor([0, 0, 0])]; tensor var_5514_end_mask_0 = const()[name = string("op_5514_end_mask_0"), val = tensor([true, false, true])]; tensor var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = concat_560, end_mask = var_5514_end_mask_0, x = v_cache_101_cast_fp16)[name = string("op_5514_cast_fp16")]; tensor concat_562x = const()[name = string("concat_562x"), val = tensor([1, -1, 20, 64])]; tensor var_5524_cast_fp16 = reshape(shape = concat_562x, x = linear_200_cast_fp16)[name = string("op_5524_cast_fp16")]; tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_203_cast_fp16 = mul(x = var_5524_cast_fp16, y = const_260_to_fp16)[name = string("q_203_cast_fp16")]; tensor concat_563x = const()[name = string("concat_563x"), val = tensor([1, -1, 20, 64])]; tensor var_5531_cast_fp16 = reshape(shape = concat_563x, x = var_5511_cast_fp16)[name = string("op_5531_cast_fp16")]; tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_255_cast_fp16 = mul(x = var_5531_cast_fp16, y = const_261_to_fp16)[name = string("k_255_cast_fp16")]; tensor concat_564x = const()[name = string("concat_564x"), val = tensor([1, -1, 20, 64])]; tensor var_5538_cast_fp16 = reshape(shape = concat_564x, x = var_5514_cast_fp16)[name = string("op_5538_cast_fp16")]; tensor var_5539 = const()[name = string("op_5539"), val = tensor([0, 2, 1, 3])]; bool qk_151_transpose_x_0 = const()[name = string("qk_151_transpose_x_0"), val = bool(false)]; bool qk_151_transpose_y_0 = const()[name = string("qk_151_transpose_y_0"), val = bool(false)]; tensor transpose_357_perm_0 = const()[name = string("transpose_357_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_358_perm_0 = const()[name = string("transpose_358_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_358 = transpose(perm = transpose_358_perm_0, x = k_255_cast_fp16)[name = string("transpose_438")]; tensor transpose_357 = transpose(perm = transpose_357_perm_0, x = q_203_cast_fp16)[name = string("transpose_439")]; tensor qk_151_cast_fp16 = matmul(transpose_x = qk_151_transpose_x_0, transpose_y = qk_151_transpose_y_0, x = transpose_357, y = transpose_358)[name = string("qk_151_cast_fp16")]; int32 concat_565_values1_0 = const()[name = string("concat_565_values1_0"), val = int32(448)]; int32 concat_565_axis_0 = const()[name = string("concat_565_axis_0"), val = int32(0)]; bool concat_565_interleave_0 = const()[name = string("concat_565_interleave_0"), val = bool(false)]; tensor concat_565 = concat(axis = concat_565_axis_0, interleave = concat_565_interleave_0, values = (gather_302_cast_uint16_to_int32, concat_565_values1_0))[name = string("concat_565")]; tensor var_5542_begin_0 = const()[name = string("op_5542_begin_0"), val = tensor([0, 0])]; tensor var_5542_end_mask_0 = const()[name = string("op_5542_end_mask_0"), val = tensor([false, true])]; tensor var_5542_cast_fp16 = slice_by_index(begin = var_5542_begin_0, end = concat_565, end_mask = var_5542_end_mask_0, x = mask_to_fp16)[name = string("op_5542_cast_fp16")]; int32 concat_566_values0_0 = const()[name = string("concat_566_values0_0"), val = int32(0)]; int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)]; bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)]; tensor concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (concat_566_values0_0, gather_302_cast_uint16_to_int32))[name = string("concat_566")]; tensor var_5543_begin_0 = const()[name = string("op_5543_begin_0"), val = tensor([0, 0])]; tensor var_5543_end_mask_0 = const()[name = string("op_5543_end_mask_0"), val = tensor([true, false])]; tensor var_5543_cast_fp16 = slice_by_index(begin = var_5543_begin_0, end = concat_566, end_mask = var_5543_end_mask_0, x = var_5542_cast_fp16)[name = string("op_5543_cast_fp16")]; tensor qk_153_cast_fp16 = add(x = qk_151_cast_fp16, y = var_5543_cast_fp16)[name = string("qk_153_cast_fp16")]; tensor var_5546_cast_fp16 = softmax(axis = var_5455, x = qk_153_cast_fp16)[name = string("op_5546_cast_fp16")]; bool var_5548_transpose_x_0 = const()[name = string("op_5548_transpose_x_0"), val = bool(false)]; bool var_5548_transpose_y_0 = const()[name = string("op_5548_transpose_y_0"), val = bool(false)]; tensor v_255_cast_fp16 = transpose(perm = var_5539, x = var_5538_cast_fp16)[name = string("transpose_440")]; tensor var_5548_cast_fp16 = matmul(transpose_x = var_5548_transpose_x_0, transpose_y = var_5548_transpose_y_0, x = var_5546_cast_fp16, y = v_255_cast_fp16)[name = string("op_5548_cast_fp16")]; tensor var_5549 = const()[name = string("op_5549"), val = tensor([0, 2, 1, 3])]; tensor concat_567x = const()[name = string("concat_567x"), val = tensor([1, -1, 1280])]; tensor var_5550_cast_fp16 = transpose(perm = var_5549, x = var_5548_cast_fp16)[name = string("transpose_437")]; tensor x_457_cast_fp16 = reshape(shape = concat_567x, x = var_5550_cast_fp16)[name = string("x_457_cast_fp16")]; tensor var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295946880)))]; tensor var_5555_to_fp16 = const()[name = string("op_5555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299223744)))]; tensor linear_203_cast_fp16 = linear(bias = var_5555_to_fp16, weight = var_5554_to_fp16, x = x_457_cast_fp16)[name = string("linear_203_cast_fp16")]; tensor x_459_cast_fp16 = add(x = x_453_cast_fp16, y = linear_203_cast_fp16)[name = string("x_459_cast_fp16")]; tensor var_5562_axes_0 = const()[name = string("op_5562_axes_0"), val = tensor([-1])]; tensor blocks_25_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299226368)))]; tensor blocks_25_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299228992)))]; tensor var_5562_cast_fp16 = layer_norm(axes = var_5562_axes_0, beta = blocks_25_cross_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_cross_attn_ln_weight_to_fp16, x = x_459_cast_fp16)[name = string("op_5562_cast_fp16")]; tensor var_5571_to_fp16 = const()[name = string("op_5571_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299231616)))]; tensor var_5572_to_fp16 = const()[name = string("op_5572_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302508480)))]; tensor linear_204_cast_fp16 = linear(bias = var_5572_to_fp16, weight = var_5571_to_fp16, x = var_5562_cast_fp16)[name = string("linear_204_cast_fp16")]; tensor concat_568 = const()[name = string("concat_568"), val = tensor([0, 0, 0])]; tensor concat_569 = const()[name = string("concat_569"), val = tensor([0, 1500, 0])]; tensor k_257_internal_tensor_assign_1_stride_0 = const()[name = string("k_257_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_568, begin_mask = k_257_internal_tensor_assign_1_begin_mask_0, end = concat_569, end_mask = k_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_257_internal_tensor_assign_1_squeeze_mask_0, stride = k_257_internal_tensor_assign_1_stride_0, update = k_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("k_257_internal_tensor_assign_1_cast_fp16")]; tensor concat_570 = const()[name = string("concat_570"), val = tensor([0, 0, 0])]; tensor concat_571 = const()[name = string("concat_571"), val = tensor([0, 1500, 0])]; tensor v_257_internal_tensor_assign_1_stride_0 = const()[name = string("v_257_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_570, begin_mask = v_257_internal_tensor_assign_1_begin_mask_0, end = concat_571, end_mask = v_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_257_internal_tensor_assign_1_squeeze_mask_0, stride = v_257_internal_tensor_assign_1_stride_0, update = v_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("v_257_internal_tensor_assign_1_cast_fp16")]; tensor concat_572x = const()[name = string("concat_572x"), val = tensor([1, -1, 20, 64])]; tensor var_5592_cast_fp16 = reshape(shape = concat_572x, x = linear_204_cast_fp16)[name = string("op_5592_cast_fp16")]; tensor const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_207_cast_fp16 = mul(x = var_5592_cast_fp16, y = const_262_to_fp16)[name = string("q_207_cast_fp16")]; tensor var_5598 = const()[name = string("op_5598"), val = tensor([1, 1500, 20, -1])]; tensor var_5599_cast_fp16 = reshape(shape = var_5598, x = k_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5599_cast_fp16")]; tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_259_cast_fp16 = mul(x = var_5599_cast_fp16, y = const_263_to_fp16)[name = string("k_259_cast_fp16")]; tensor var_5605 = const()[name = string("op_5605"), val = tensor([1, 1500, 20, -1])]; tensor var_5606_cast_fp16 = reshape(shape = var_5605, x = v_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5606_cast_fp16")]; tensor var_5607 = const()[name = string("op_5607"), val = tensor([0, 2, 1, 3])]; bool qk_155_transpose_x_0 = const()[name = string("qk_155_transpose_x_0"), val = bool(false)]; bool qk_155_transpose_y_0 = const()[name = string("qk_155_transpose_y_0"), val = bool(false)]; tensor transpose_359_perm_0 = const()[name = string("transpose_359_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_360_perm_0 = const()[name = string("transpose_360_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_360 = transpose(perm = transpose_360_perm_0, x = k_259_cast_fp16)[name = string("transpose_434")]; tensor transpose_359 = transpose(perm = transpose_359_perm_0, x = q_207_cast_fp16)[name = string("transpose_435")]; tensor qk_155_cast_fp16 = matmul(transpose_x = qk_155_transpose_x_0, transpose_y = qk_155_transpose_y_0, x = transpose_359, y = transpose_360)[name = string("qk_155_cast_fp16")]; tensor var_5611_cast_fp16 = softmax(axis = var_5455, x = qk_155_cast_fp16)[name = string("op_5611_cast_fp16")]; bool var_5613_transpose_x_0 = const()[name = string("op_5613_transpose_x_0"), val = bool(false)]; bool var_5613_transpose_y_0 = const()[name = string("op_5613_transpose_y_0"), val = bool(false)]; tensor v_259_cast_fp16 = transpose(perm = var_5607, x = var_5606_cast_fp16)[name = string("transpose_436")]; tensor var_5613_cast_fp16 = matmul(transpose_x = var_5613_transpose_x_0, transpose_y = var_5613_transpose_y_0, x = var_5611_cast_fp16, y = v_259_cast_fp16)[name = string("op_5613_cast_fp16")]; tensor var_5614 = const()[name = string("op_5614"), val = tensor([0, 2, 1, 3])]; tensor concat_573x = const()[name = string("concat_573x"), val = tensor([1, -1, 1280])]; tensor var_5615_cast_fp16 = transpose(perm = var_5614, x = var_5613_cast_fp16)[name = string("transpose_433")]; tensor x_463_cast_fp16 = reshape(shape = concat_573x, x = var_5615_cast_fp16)[name = string("x_463_cast_fp16")]; tensor var_5619_to_fp16 = const()[name = string("op_5619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302511104)))]; tensor var_5620_to_fp16 = const()[name = string("op_5620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305787968)))]; tensor linear_205_cast_fp16 = linear(bias = var_5620_to_fp16, weight = var_5619_to_fp16, x = x_463_cast_fp16)[name = string("linear_205_cast_fp16")]; tensor x_465_cast_fp16 = add(x = x_459_cast_fp16, y = linear_205_cast_fp16)[name = string("x_465_cast_fp16")]; tensor var_5627_axes_0 = const()[name = string("op_5627_axes_0"), val = tensor([-1])]; tensor blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305790592)))]; tensor blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305793216)))]; tensor var_5627_cast_fp16 = layer_norm(axes = var_5627_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_465_cast_fp16)[name = string("op_5627_cast_fp16")]; tensor var_5636_to_fp16 = const()[name = string("op_5636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305795840)))]; tensor var_5637_to_fp16 = const()[name = string("op_5637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318903104)))]; tensor linear_206_cast_fp16 = linear(bias = var_5637_to_fp16, weight = var_5636_to_fp16, x = var_5627_cast_fp16)[name = string("linear_206_cast_fp16")]; string x_469_mode_0 = const()[name = string("x_469_mode_0"), val = string("EXACT")]; tensor x_469_cast_fp16 = gelu(mode = x_469_mode_0, x = linear_206_cast_fp16)[name = string("x_469_cast_fp16")]; tensor var_5642_to_fp16 = const()[name = string("op_5642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318913408)))]; tensor var_5643_to_fp16 = const()[name = string("op_5643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332020672)))]; tensor linear_207_cast_fp16 = linear(bias = var_5643_to_fp16, weight = var_5642_to_fp16, x = x_469_cast_fp16)[name = string("linear_207_cast_fp16")]; tensor x_471_cast_fp16 = add(x = x_465_cast_fp16, y = linear_207_cast_fp16)[name = string("x_471_cast_fp16")]; tensor k_cache_105_begin_0 = const()[name = string("k_cache_105_begin_0"), val = tensor([26, 0, 0, 0])]; tensor k_cache_105_end_0 = const()[name = string("k_cache_105_end_0"), val = tensor([27, 1, 448, 1280])]; tensor k_cache_105_end_mask_0 = const()[name = string("k_cache_105_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_105_squeeze_mask_0 = const()[name = string("k_cache_105_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_105_cast_fp16 = slice_by_index(begin = k_cache_105_begin_0, end = k_cache_105_end_0, end_mask = k_cache_105_end_mask_0, squeeze_mask = k_cache_105_squeeze_mask_0, x = coreml_update_state_114)[name = string("k_cache_105_cast_fp16")]; tensor v_cache_105_begin_0 = const()[name = string("v_cache_105_begin_0"), val = tensor([26, 0, 0, 0])]; tensor v_cache_105_end_0 = const()[name = string("v_cache_105_end_0"), val = tensor([27, 1, 448, 1280])]; tensor v_cache_105_end_mask_0 = const()[name = string("v_cache_105_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_105_squeeze_mask_0 = const()[name = string("v_cache_105_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_105_cast_fp16 = slice_by_index(begin = v_cache_105_begin_0, end = v_cache_105_end_0, end_mask = v_cache_105_end_mask_0, squeeze_mask = v_cache_105_squeeze_mask_0, x = coreml_update_state_115)[name = string("v_cache_105_cast_fp16")]; tensor k_cache_107_begin_0 = const()[name = string("k_cache_107_begin_0"), val = tensor([26, 0, 0, 0])]; tensor k_cache_107_end_0 = const()[name = string("k_cache_107_end_0"), val = tensor([27, 1, 1500, 1280])]; tensor k_cache_107_end_mask_0 = const()[name = string("k_cache_107_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_107_squeeze_mask_0 = const()[name = string("k_cache_107_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_107_cast_fp16 = slice_by_index(begin = k_cache_107_begin_0, end = k_cache_107_end_0, end_mask = k_cache_107_end_mask_0, squeeze_mask = k_cache_107_squeeze_mask_0, x = read_state_2)[name = string("k_cache_107_cast_fp16")]; tensor v_cache_107_begin_0 = const()[name = string("v_cache_107_begin_0"), val = tensor([26, 0, 0, 0])]; tensor v_cache_107_end_0 = const()[name = string("v_cache_107_end_0"), val = tensor([27, 1, 1500, 1280])]; tensor v_cache_107_end_mask_0 = const()[name = string("v_cache_107_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_107_squeeze_mask_0 = const()[name = string("v_cache_107_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_107_cast_fp16 = slice_by_index(begin = v_cache_107_begin_0, end = v_cache_107_end_0, end_mask = v_cache_107_end_mask_0, squeeze_mask = v_cache_107_squeeze_mask_0, x = read_state_3)[name = string("v_cache_107_cast_fp16")]; int32 var_5666 = const()[name = string("op_5666"), val = int32(-1)]; tensor var_5684_axes_0 = const()[name = string("op_5684_axes_0"), val = tensor([-1])]; tensor blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332023296)))]; tensor blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332025920)))]; fp16 var_5672_to_fp16 = const()[name = string("op_5672_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5684_cast_fp16 = layer_norm(axes = var_5684_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_471_cast_fp16)[name = string("op_5684_cast_fp16")]; tensor var_5695_to_fp16 = const()[name = string("op_5695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332028544)))]; tensor var_5696_to_fp16 = const()[name = string("op_5696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335305408)))]; tensor linear_208_cast_fp16 = linear(bias = var_5696_to_fp16, weight = var_5695_to_fp16, x = var_5684_cast_fp16)[name = string("linear_208_cast_fp16")]; tensor var_5699_to_fp16 = const()[name = string("op_5699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335308032)))]; tensor linear_209_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5699_to_fp16, x = var_5684_cast_fp16)[name = string("linear_209_cast_fp16")]; tensor var_5703_to_fp16 = const()[name = string("op_5703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1338584896)))]; tensor var_5704_to_fp16 = const()[name = string("op_5704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341861760)))]; tensor linear_210_cast_fp16 = linear(bias = var_5704_to_fp16, weight = var_5703_to_fp16, x = var_5684_cast_fp16)[name = string("linear_210_cast_fp16")]; tensor var_5706_shape_cast_fp16 = shape(x = linear_208_cast_fp16)[name = string("op_5706_shape_cast_fp16")]; int32 gather_314_axis_0 = const()[name = string("gather_314_axis_0"), val = int32(0)]; int32 gather_314_batch_dims_0 = const()[name = string("gather_314_batch_dims_0"), val = int32(0)]; bool gather_314_validate_indices_0 = const()[name = string("gather_314_validate_indices_0"), val = bool(false)]; string var_5706_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5706_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_314_to_uint16 = const()[name = string("select_314_to_uint16"), val = uint16(1)]; tensor var_5706_shape_cast_fp16_to_uint16 = cast(dtype = var_5706_shape_cast_fp16_to_uint16_dtype_0, x = var_5706_shape_cast_fp16)[name = string("cast_338")]; uint16 gather_314_cast_uint16 = gather(axis = gather_314_axis_0, batch_dims = gather_314_batch_dims_0, indices = select_314_to_uint16, validate_indices = gather_314_validate_indices_0, x = var_5706_shape_cast_fp16_to_uint16)[name = string("gather_314_cast_uint16")]; string gather_314_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_314_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_314_cast_uint16_to_int32 = cast(dtype = gather_314_cast_uint16_to_int32_dtype_0, x = gather_314_cast_uint16)[name = string("cast_337")]; int32 end_step_55 = add(x = offset, y = gather_314_cast_uint16_to_int32)[name = string("end_step_55")]; tensor expand_dims_416 = const()[name = string("expand_dims_416"), val = tensor([0])]; tensor expand_dims_418 = const()[name = string("expand_dims_418"), val = tensor([0])]; tensor expand_dims_419_axes_0 = const()[name = string("expand_dims_419_axes_0"), val = tensor([0])]; tensor expand_dims_419 = expand_dims(axes = expand_dims_419_axes_0, x = end_step_55)[name = string("expand_dims_419")]; tensor concat_576_values0_0 = const()[name = string("concat_576_values0_0"), val = tensor([26])]; int32 concat_576_axis_0 = const()[name = string("concat_576_axis_0"), val = int32(0)]; bool concat_576_interleave_0 = const()[name = string("concat_576_interleave_0"), val = bool(false)]; tensor concat_576 = concat(axis = concat_576_axis_0, interleave = concat_576_interleave_0, values = (concat_576_values0_0, expand_dims_416, expand_dims_1, expand_dims_418))[name = string("concat_576")]; tensor concat_577_values0_0 = const()[name = string("concat_577_values0_0"), val = tensor([0])]; tensor concat_577_values1_0 = const()[name = string("concat_577_values1_0"), val = tensor([0])]; tensor concat_577_values3_0 = const()[name = string("concat_577_values3_0"), val = tensor([0])]; int32 concat_577_axis_0 = const()[name = string("concat_577_axis_0"), val = int32(0)]; bool concat_577_interleave_0 = const()[name = string("concat_577_interleave_0"), val = bool(false)]; tensor concat_577 = concat(axis = concat_577_axis_0, interleave = concat_577_interleave_0, values = (concat_577_values0_0, concat_577_values1_0, expand_dims_419, concat_577_values3_0))[name = string("concat_577")]; tensor k_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = k_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = k_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_27_stride_0, update = linear_209_cast_fp16, x = coreml_update_state_114)[name = string("k_cache1_internal_tensor_assign_27_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_27_cast_fp16, input = k_cache1)[name = string("coreml_update_state_116_write_state")]; tensor coreml_update_state_116 = read_state(input = k_cache1)[name = string("coreml_update_state_116")]; tensor v_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = v_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = v_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_27_stride_0, update = linear_210_cast_fp16, x = coreml_update_state_115)[name = string("v_cache1_internal_tensor_assign_27_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_27_cast_fp16, input = v_cache1)[name = string("coreml_update_state_117_write_state")]; tensor coreml_update_state_117 = read_state(input = v_cache1)[name = string("coreml_update_state_117")]; int32 concat_582_values0_0 = const()[name = string("concat_582_values0_0"), val = int32(1)]; int32 concat_582_values2_0 = const()[name = string("concat_582_values2_0"), val = int32(1280)]; int32 concat_582_axis_0 = const()[name = string("concat_582_axis_0"), val = int32(0)]; bool concat_582_interleave_0 = const()[name = string("concat_582_interleave_0"), val = bool(false)]; tensor concat_582 = concat(axis = concat_582_axis_0, interleave = concat_582_interleave_0, values = (concat_582_values0_0, end_step_55, concat_582_values2_0))[name = string("concat_582")]; tensor var_5722_begin_0 = const()[name = string("op_5722_begin_0"), val = tensor([0, 0, 0])]; tensor var_5722_end_mask_0 = const()[name = string("op_5722_end_mask_0"), val = tensor([true, false, true])]; tensor var_5722_cast_fp16 = slice_by_index(begin = var_5722_begin_0, end = concat_582, end_mask = var_5722_end_mask_0, x = k_cache_105_cast_fp16)[name = string("op_5722_cast_fp16")]; tensor var_5725_begin_0 = const()[name = string("op_5725_begin_0"), val = tensor([0, 0, 0])]; tensor var_5725_end_mask_0 = const()[name = string("op_5725_end_mask_0"), val = tensor([true, false, true])]; tensor var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = concat_582, end_mask = var_5725_end_mask_0, x = v_cache_105_cast_fp16)[name = string("op_5725_cast_fp16")]; tensor concat_584x = const()[name = string("concat_584x"), val = tensor([1, -1, 20, 64])]; tensor var_5735_cast_fp16 = reshape(shape = concat_584x, x = linear_208_cast_fp16)[name = string("op_5735_cast_fp16")]; tensor const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_211_cast_fp16 = mul(x = var_5735_cast_fp16, y = const_264_to_fp16)[name = string("q_211_cast_fp16")]; tensor concat_585x = const()[name = string("concat_585x"), val = tensor([1, -1, 20, 64])]; tensor var_5742_cast_fp16 = reshape(shape = concat_585x, x = var_5722_cast_fp16)[name = string("op_5742_cast_fp16")]; tensor const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_265_cast_fp16 = mul(x = var_5742_cast_fp16, y = const_265_to_fp16)[name = string("k_265_cast_fp16")]; tensor concat_586x = const()[name = string("concat_586x"), val = tensor([1, -1, 20, 64])]; tensor var_5749_cast_fp16 = reshape(shape = concat_586x, x = var_5725_cast_fp16)[name = string("op_5749_cast_fp16")]; tensor var_5750 = const()[name = string("op_5750"), val = tensor([0, 2, 1, 3])]; bool qk_157_transpose_x_0 = const()[name = string("qk_157_transpose_x_0"), val = bool(false)]; bool qk_157_transpose_y_0 = const()[name = string("qk_157_transpose_y_0"), val = bool(false)]; tensor transpose_361_perm_0 = const()[name = string("transpose_361_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_362_perm_0 = const()[name = string("transpose_362_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_362 = transpose(perm = transpose_362_perm_0, x = k_265_cast_fp16)[name = string("transpose_430")]; tensor transpose_361 = transpose(perm = transpose_361_perm_0, x = q_211_cast_fp16)[name = string("transpose_431")]; tensor qk_157_cast_fp16 = matmul(transpose_x = qk_157_transpose_x_0, transpose_y = qk_157_transpose_y_0, x = transpose_361, y = transpose_362)[name = string("qk_157_cast_fp16")]; int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(448)]; int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)]; bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)]; tensor concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (gather_314_cast_uint16_to_int32, concat_587_values1_0))[name = string("concat_587")]; tensor var_5753_begin_0 = const()[name = string("op_5753_begin_0"), val = tensor([0, 0])]; tensor var_5753_end_mask_0 = const()[name = string("op_5753_end_mask_0"), val = tensor([false, true])]; tensor var_5753_cast_fp16 = slice_by_index(begin = var_5753_begin_0, end = concat_587, end_mask = var_5753_end_mask_0, x = mask_to_fp16)[name = string("op_5753_cast_fp16")]; int32 concat_588_values0_0 = const()[name = string("concat_588_values0_0"), val = int32(0)]; int32 concat_588_axis_0 = const()[name = string("concat_588_axis_0"), val = int32(0)]; bool concat_588_interleave_0 = const()[name = string("concat_588_interleave_0"), val = bool(false)]; tensor concat_588 = concat(axis = concat_588_axis_0, interleave = concat_588_interleave_0, values = (concat_588_values0_0, gather_314_cast_uint16_to_int32))[name = string("concat_588")]; tensor var_5754_begin_0 = const()[name = string("op_5754_begin_0"), val = tensor([0, 0])]; tensor var_5754_end_mask_0 = const()[name = string("op_5754_end_mask_0"), val = tensor([true, false])]; tensor var_5754_cast_fp16 = slice_by_index(begin = var_5754_begin_0, end = concat_588, end_mask = var_5754_end_mask_0, x = var_5753_cast_fp16)[name = string("op_5754_cast_fp16")]; tensor qk_159_cast_fp16 = add(x = qk_157_cast_fp16, y = var_5754_cast_fp16)[name = string("qk_159_cast_fp16")]; tensor var_5757_cast_fp16 = softmax(axis = var_5666, x = qk_159_cast_fp16)[name = string("op_5757_cast_fp16")]; bool var_5759_transpose_x_0 = const()[name = string("op_5759_transpose_x_0"), val = bool(false)]; bool var_5759_transpose_y_0 = const()[name = string("op_5759_transpose_y_0"), val = bool(false)]; tensor v_265_cast_fp16 = transpose(perm = var_5750, x = var_5749_cast_fp16)[name = string("transpose_432")]; tensor var_5759_cast_fp16 = matmul(transpose_x = var_5759_transpose_x_0, transpose_y = var_5759_transpose_y_0, x = var_5757_cast_fp16, y = v_265_cast_fp16)[name = string("op_5759_cast_fp16")]; tensor var_5760 = const()[name = string("op_5760"), val = tensor([0, 2, 1, 3])]; tensor concat_589x = const()[name = string("concat_589x"), val = tensor([1, -1, 1280])]; tensor var_5761_cast_fp16 = transpose(perm = var_5760, x = var_5759_cast_fp16)[name = string("transpose_429")]; tensor x_475_cast_fp16 = reshape(shape = concat_589x, x = var_5761_cast_fp16)[name = string("x_475_cast_fp16")]; tensor var_5765_to_fp16 = const()[name = string("op_5765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341864384)))]; tensor var_5766_to_fp16 = const()[name = string("op_5766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345141248)))]; tensor linear_211_cast_fp16 = linear(bias = var_5766_to_fp16, weight = var_5765_to_fp16, x = x_475_cast_fp16)[name = string("linear_211_cast_fp16")]; tensor x_477_cast_fp16 = add(x = x_471_cast_fp16, y = linear_211_cast_fp16)[name = string("x_477_cast_fp16")]; tensor var_5773_axes_0 = const()[name = string("op_5773_axes_0"), val = tensor([-1])]; tensor blocks_26_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345143872)))]; tensor blocks_26_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345146496)))]; tensor var_5773_cast_fp16 = layer_norm(axes = var_5773_axes_0, beta = blocks_26_cross_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_cross_attn_ln_weight_to_fp16, x = x_477_cast_fp16)[name = string("op_5773_cast_fp16")]; tensor var_5782_to_fp16 = const()[name = string("op_5782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345149120)))]; tensor var_5783_to_fp16 = const()[name = string("op_5783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348425984)))]; tensor linear_212_cast_fp16 = linear(bias = var_5783_to_fp16, weight = var_5782_to_fp16, x = var_5773_cast_fp16)[name = string("linear_212_cast_fp16")]; tensor concat_590 = const()[name = string("concat_590"), val = tensor([0, 0, 0])]; tensor concat_591 = const()[name = string("concat_591"), val = tensor([0, 1500, 0])]; tensor k_267_internal_tensor_assign_1_stride_0 = const()[name = string("k_267_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_590, begin_mask = k_267_internal_tensor_assign_1_begin_mask_0, end = concat_591, end_mask = k_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_267_internal_tensor_assign_1_squeeze_mask_0, stride = k_267_internal_tensor_assign_1_stride_0, update = k_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("k_267_internal_tensor_assign_1_cast_fp16")]; tensor concat_592 = const()[name = string("concat_592"), val = tensor([0, 0, 0])]; tensor concat_593 = const()[name = string("concat_593"), val = tensor([0, 1500, 0])]; tensor v_267_internal_tensor_assign_1_stride_0 = const()[name = string("v_267_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_592, begin_mask = v_267_internal_tensor_assign_1_begin_mask_0, end = concat_593, end_mask = v_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_267_internal_tensor_assign_1_squeeze_mask_0, stride = v_267_internal_tensor_assign_1_stride_0, update = v_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("v_267_internal_tensor_assign_1_cast_fp16")]; tensor concat_594x = const()[name = string("concat_594x"), val = tensor([1, -1, 20, 64])]; tensor var_5803_cast_fp16 = reshape(shape = concat_594x, x = linear_212_cast_fp16)[name = string("op_5803_cast_fp16")]; tensor const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_215_cast_fp16 = mul(x = var_5803_cast_fp16, y = const_266_to_fp16)[name = string("q_215_cast_fp16")]; tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 1500, 20, -1])]; tensor var_5810_cast_fp16 = reshape(shape = var_5809, x = k_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5810_cast_fp16")]; tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_269_cast_fp16 = mul(x = var_5810_cast_fp16, y = const_267_to_fp16)[name = string("k_269_cast_fp16")]; tensor var_5816 = const()[name = string("op_5816"), val = tensor([1, 1500, 20, -1])]; tensor var_5817_cast_fp16 = reshape(shape = var_5816, x = v_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5817_cast_fp16")]; tensor var_5818 = const()[name = string("op_5818"), val = tensor([0, 2, 1, 3])]; bool qk_161_transpose_x_0 = const()[name = string("qk_161_transpose_x_0"), val = bool(false)]; bool qk_161_transpose_y_0 = const()[name = string("qk_161_transpose_y_0"), val = bool(false)]; tensor transpose_363_perm_0 = const()[name = string("transpose_363_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_364_perm_0 = const()[name = string("transpose_364_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_364 = transpose(perm = transpose_364_perm_0, x = k_269_cast_fp16)[name = string("transpose_426")]; tensor transpose_363 = transpose(perm = transpose_363_perm_0, x = q_215_cast_fp16)[name = string("transpose_427")]; tensor qk_161_cast_fp16 = matmul(transpose_x = qk_161_transpose_x_0, transpose_y = qk_161_transpose_y_0, x = transpose_363, y = transpose_364)[name = string("qk_161_cast_fp16")]; tensor var_5822_cast_fp16 = softmax(axis = var_5666, x = qk_161_cast_fp16)[name = string("op_5822_cast_fp16")]; bool var_5824_transpose_x_0 = const()[name = string("op_5824_transpose_x_0"), val = bool(false)]; bool var_5824_transpose_y_0 = const()[name = string("op_5824_transpose_y_0"), val = bool(false)]; tensor v_269_cast_fp16 = transpose(perm = var_5818, x = var_5817_cast_fp16)[name = string("transpose_428")]; tensor var_5824_cast_fp16 = matmul(transpose_x = var_5824_transpose_x_0, transpose_y = var_5824_transpose_y_0, x = var_5822_cast_fp16, y = v_269_cast_fp16)[name = string("op_5824_cast_fp16")]; tensor var_5825 = const()[name = string("op_5825"), val = tensor([0, 2, 1, 3])]; tensor concat_595x = const()[name = string("concat_595x"), val = tensor([1, -1, 1280])]; tensor var_5826_cast_fp16 = transpose(perm = var_5825, x = var_5824_cast_fp16)[name = string("transpose_425")]; tensor x_481_cast_fp16 = reshape(shape = concat_595x, x = var_5826_cast_fp16)[name = string("x_481_cast_fp16")]; tensor var_5830_to_fp16 = const()[name = string("op_5830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348428608)))]; tensor var_5831_to_fp16 = const()[name = string("op_5831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351705472)))]; tensor linear_213_cast_fp16 = linear(bias = var_5831_to_fp16, weight = var_5830_to_fp16, x = x_481_cast_fp16)[name = string("linear_213_cast_fp16")]; tensor x_483_cast_fp16 = add(x = x_477_cast_fp16, y = linear_213_cast_fp16)[name = string("x_483_cast_fp16")]; tensor var_5838_axes_0 = const()[name = string("op_5838_axes_0"), val = tensor([-1])]; tensor blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351708096)))]; tensor blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351710720)))]; tensor var_5838_cast_fp16 = layer_norm(axes = var_5838_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_483_cast_fp16)[name = string("op_5838_cast_fp16")]; tensor var_5847_to_fp16 = const()[name = string("op_5847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351713344)))]; tensor var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364820608)))]; tensor linear_214_cast_fp16 = linear(bias = var_5848_to_fp16, weight = var_5847_to_fp16, x = var_5838_cast_fp16)[name = string("linear_214_cast_fp16")]; string x_487_mode_0 = const()[name = string("x_487_mode_0"), val = string("EXACT")]; tensor x_487_cast_fp16 = gelu(mode = x_487_mode_0, x = linear_214_cast_fp16)[name = string("x_487_cast_fp16")]; tensor var_5853_to_fp16 = const()[name = string("op_5853_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364830912)))]; tensor var_5854_to_fp16 = const()[name = string("op_5854_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377938176)))]; tensor linear_215_cast_fp16 = linear(bias = var_5854_to_fp16, weight = var_5853_to_fp16, x = x_487_cast_fp16)[name = string("linear_215_cast_fp16")]; tensor x_489_cast_fp16 = add(x = x_483_cast_fp16, y = linear_215_cast_fp16)[name = string("x_489_cast_fp16")]; tensor k_cache_109_begin_0 = const()[name = string("k_cache_109_begin_0"), val = tensor([27, 0, 0, 0])]; tensor k_cache_109_end_0 = const()[name = string("k_cache_109_end_0"), val = tensor([28, 1, 448, 1280])]; tensor k_cache_109_end_mask_0 = const()[name = string("k_cache_109_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_109_squeeze_mask_0 = const()[name = string("k_cache_109_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_109_cast_fp16 = slice_by_index(begin = k_cache_109_begin_0, end = k_cache_109_end_0, end_mask = k_cache_109_end_mask_0, squeeze_mask = k_cache_109_squeeze_mask_0, x = coreml_update_state_116)[name = string("k_cache_109_cast_fp16")]; tensor v_cache_109_begin_0 = const()[name = string("v_cache_109_begin_0"), val = tensor([27, 0, 0, 0])]; tensor v_cache_109_end_0 = const()[name = string("v_cache_109_end_0"), val = tensor([28, 1, 448, 1280])]; tensor v_cache_109_end_mask_0 = const()[name = string("v_cache_109_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_109_squeeze_mask_0 = const()[name = string("v_cache_109_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_109_cast_fp16 = slice_by_index(begin = v_cache_109_begin_0, end = v_cache_109_end_0, end_mask = v_cache_109_end_mask_0, squeeze_mask = v_cache_109_squeeze_mask_0, x = coreml_update_state_117)[name = string("v_cache_109_cast_fp16")]; tensor k_cache_111_begin_0 = const()[name = string("k_cache_111_begin_0"), val = tensor([27, 0, 0, 0])]; tensor k_cache_111_end_0 = const()[name = string("k_cache_111_end_0"), val = tensor([28, 1, 1500, 1280])]; tensor k_cache_111_end_mask_0 = const()[name = string("k_cache_111_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_111_squeeze_mask_0 = const()[name = string("k_cache_111_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_111_cast_fp16 = slice_by_index(begin = k_cache_111_begin_0, end = k_cache_111_end_0, end_mask = k_cache_111_end_mask_0, squeeze_mask = k_cache_111_squeeze_mask_0, x = read_state_2)[name = string("k_cache_111_cast_fp16")]; tensor v_cache_111_begin_0 = const()[name = string("v_cache_111_begin_0"), val = tensor([27, 0, 0, 0])]; tensor v_cache_111_end_0 = const()[name = string("v_cache_111_end_0"), val = tensor([28, 1, 1500, 1280])]; tensor v_cache_111_end_mask_0 = const()[name = string("v_cache_111_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_111_squeeze_mask_0 = const()[name = string("v_cache_111_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_111_cast_fp16 = slice_by_index(begin = v_cache_111_begin_0, end = v_cache_111_end_0, end_mask = v_cache_111_end_mask_0, squeeze_mask = v_cache_111_squeeze_mask_0, x = read_state_3)[name = string("v_cache_111_cast_fp16")]; int32 var_5877 = const()[name = string("op_5877"), val = int32(-1)]; tensor var_5895_axes_0 = const()[name = string("op_5895_axes_0"), val = tensor([-1])]; tensor blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377940800)))]; tensor blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377943424)))]; fp16 var_5883_to_fp16 = const()[name = string("op_5883_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_5895_cast_fp16 = layer_norm(axes = var_5895_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_489_cast_fp16)[name = string("op_5895_cast_fp16")]; tensor var_5906_to_fp16 = const()[name = string("op_5906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377946048)))]; tensor var_5907_to_fp16 = const()[name = string("op_5907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381222912)))]; tensor linear_216_cast_fp16 = linear(bias = var_5907_to_fp16, weight = var_5906_to_fp16, x = var_5895_cast_fp16)[name = string("linear_216_cast_fp16")]; tensor var_5910_to_fp16 = const()[name = string("op_5910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381225536)))]; tensor linear_217_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5910_to_fp16, x = var_5895_cast_fp16)[name = string("linear_217_cast_fp16")]; tensor var_5914_to_fp16 = const()[name = string("op_5914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1384502400)))]; tensor var_5915_to_fp16 = const()[name = string("op_5915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387779264)))]; tensor linear_218_cast_fp16 = linear(bias = var_5915_to_fp16, weight = var_5914_to_fp16, x = var_5895_cast_fp16)[name = string("linear_218_cast_fp16")]; tensor var_5917_shape_cast_fp16 = shape(x = linear_216_cast_fp16)[name = string("op_5917_shape_cast_fp16")]; int32 gather_326_axis_0 = const()[name = string("gather_326_axis_0"), val = int32(0)]; int32 gather_326_batch_dims_0 = const()[name = string("gather_326_batch_dims_0"), val = int32(0)]; bool gather_326_validate_indices_0 = const()[name = string("gather_326_validate_indices_0"), val = bool(false)]; string var_5917_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5917_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_326_to_uint16 = const()[name = string("select_326_to_uint16"), val = uint16(1)]; tensor var_5917_shape_cast_fp16_to_uint16 = cast(dtype = var_5917_shape_cast_fp16_to_uint16_dtype_0, x = var_5917_shape_cast_fp16)[name = string("cast_336")]; uint16 gather_326_cast_uint16 = gather(axis = gather_326_axis_0, batch_dims = gather_326_batch_dims_0, indices = select_326_to_uint16, validate_indices = gather_326_validate_indices_0, x = var_5917_shape_cast_fp16_to_uint16)[name = string("gather_326_cast_uint16")]; string gather_326_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_326_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_326_cast_uint16_to_int32 = cast(dtype = gather_326_cast_uint16_to_int32_dtype_0, x = gather_326_cast_uint16)[name = string("cast_335")]; int32 end_step_57 = add(x = offset, y = gather_326_cast_uint16_to_int32)[name = string("end_step_57")]; tensor expand_dims_432 = const()[name = string("expand_dims_432"), val = tensor([0])]; tensor expand_dims_434 = const()[name = string("expand_dims_434"), val = tensor([0])]; tensor expand_dims_435_axes_0 = const()[name = string("expand_dims_435_axes_0"), val = tensor([0])]; tensor expand_dims_435 = expand_dims(axes = expand_dims_435_axes_0, x = end_step_57)[name = string("expand_dims_435")]; tensor concat_598_values0_0 = const()[name = string("concat_598_values0_0"), val = tensor([27])]; int32 concat_598_axis_0 = const()[name = string("concat_598_axis_0"), val = int32(0)]; bool concat_598_interleave_0 = const()[name = string("concat_598_interleave_0"), val = bool(false)]; tensor concat_598 = concat(axis = concat_598_axis_0, interleave = concat_598_interleave_0, values = (concat_598_values0_0, expand_dims_432, expand_dims_1, expand_dims_434))[name = string("concat_598")]; tensor concat_599_values0_0 = const()[name = string("concat_599_values0_0"), val = tensor([0])]; tensor concat_599_values1_0 = const()[name = string("concat_599_values1_0"), val = tensor([0])]; tensor concat_599_values3_0 = const()[name = string("concat_599_values3_0"), val = tensor([0])]; int32 concat_599_axis_0 = const()[name = string("concat_599_axis_0"), val = int32(0)]; bool concat_599_interleave_0 = const()[name = string("concat_599_interleave_0"), val = bool(false)]; tensor concat_599 = concat(axis = concat_599_axis_0, interleave = concat_599_interleave_0, values = (concat_599_values0_0, concat_599_values1_0, expand_dims_435, concat_599_values3_0))[name = string("concat_599")]; tensor k_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = k_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = k_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_28_stride_0, update = linear_217_cast_fp16, x = coreml_update_state_116)[name = string("k_cache1_internal_tensor_assign_28_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_28_cast_fp16, input = k_cache1)[name = string("coreml_update_state_118_write_state")]; tensor coreml_update_state_118 = read_state(input = k_cache1)[name = string("coreml_update_state_118")]; tensor v_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = v_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = v_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_28_stride_0, update = linear_218_cast_fp16, x = coreml_update_state_117)[name = string("v_cache1_internal_tensor_assign_28_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_28_cast_fp16, input = v_cache1)[name = string("coreml_update_state_119_write_state")]; tensor coreml_update_state_119 = read_state(input = v_cache1)[name = string("coreml_update_state_119")]; int32 concat_604_values0_0 = const()[name = string("concat_604_values0_0"), val = int32(1)]; int32 concat_604_values2_0 = const()[name = string("concat_604_values2_0"), val = int32(1280)]; int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)]; bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)]; tensor concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (concat_604_values0_0, end_step_57, concat_604_values2_0))[name = string("concat_604")]; tensor var_5933_begin_0 = const()[name = string("op_5933_begin_0"), val = tensor([0, 0, 0])]; tensor var_5933_end_mask_0 = const()[name = string("op_5933_end_mask_0"), val = tensor([true, false, true])]; tensor var_5933_cast_fp16 = slice_by_index(begin = var_5933_begin_0, end = concat_604, end_mask = var_5933_end_mask_0, x = k_cache_109_cast_fp16)[name = string("op_5933_cast_fp16")]; tensor var_5936_begin_0 = const()[name = string("op_5936_begin_0"), val = tensor([0, 0, 0])]; tensor var_5936_end_mask_0 = const()[name = string("op_5936_end_mask_0"), val = tensor([true, false, true])]; tensor var_5936_cast_fp16 = slice_by_index(begin = var_5936_begin_0, end = concat_604, end_mask = var_5936_end_mask_0, x = v_cache_109_cast_fp16)[name = string("op_5936_cast_fp16")]; tensor concat_606x = const()[name = string("concat_606x"), val = tensor([1, -1, 20, 64])]; tensor var_5946_cast_fp16 = reshape(shape = concat_606x, x = linear_216_cast_fp16)[name = string("op_5946_cast_fp16")]; tensor const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_219_cast_fp16 = mul(x = var_5946_cast_fp16, y = const_268_to_fp16)[name = string("q_219_cast_fp16")]; tensor concat_607x = const()[name = string("concat_607x"), val = tensor([1, -1, 20, 64])]; tensor var_5953_cast_fp16 = reshape(shape = concat_607x, x = var_5933_cast_fp16)[name = string("op_5953_cast_fp16")]; tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_275_cast_fp16 = mul(x = var_5953_cast_fp16, y = const_269_to_fp16)[name = string("k_275_cast_fp16")]; tensor concat_608x = const()[name = string("concat_608x"), val = tensor([1, -1, 20, 64])]; tensor var_5960_cast_fp16 = reshape(shape = concat_608x, x = var_5936_cast_fp16)[name = string("op_5960_cast_fp16")]; tensor var_5961 = const()[name = string("op_5961"), val = tensor([0, 2, 1, 3])]; bool qk_163_transpose_x_0 = const()[name = string("qk_163_transpose_x_0"), val = bool(false)]; bool qk_163_transpose_y_0 = const()[name = string("qk_163_transpose_y_0"), val = bool(false)]; tensor transpose_365_perm_0 = const()[name = string("transpose_365_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_366_perm_0 = const()[name = string("transpose_366_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_366 = transpose(perm = transpose_366_perm_0, x = k_275_cast_fp16)[name = string("transpose_422")]; tensor transpose_365 = transpose(perm = transpose_365_perm_0, x = q_219_cast_fp16)[name = string("transpose_423")]; tensor qk_163_cast_fp16 = matmul(transpose_x = qk_163_transpose_x_0, transpose_y = qk_163_transpose_y_0, x = transpose_365, y = transpose_366)[name = string("qk_163_cast_fp16")]; int32 concat_609_values1_0 = const()[name = string("concat_609_values1_0"), val = int32(448)]; int32 concat_609_axis_0 = const()[name = string("concat_609_axis_0"), val = int32(0)]; bool concat_609_interleave_0 = const()[name = string("concat_609_interleave_0"), val = bool(false)]; tensor concat_609 = concat(axis = concat_609_axis_0, interleave = concat_609_interleave_0, values = (gather_326_cast_uint16_to_int32, concat_609_values1_0))[name = string("concat_609")]; tensor var_5964_begin_0 = const()[name = string("op_5964_begin_0"), val = tensor([0, 0])]; tensor var_5964_end_mask_0 = const()[name = string("op_5964_end_mask_0"), val = tensor([false, true])]; tensor var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = concat_609, end_mask = var_5964_end_mask_0, x = mask_to_fp16)[name = string("op_5964_cast_fp16")]; int32 concat_610_values0_0 = const()[name = string("concat_610_values0_0"), val = int32(0)]; int32 concat_610_axis_0 = const()[name = string("concat_610_axis_0"), val = int32(0)]; bool concat_610_interleave_0 = const()[name = string("concat_610_interleave_0"), val = bool(false)]; tensor concat_610 = concat(axis = concat_610_axis_0, interleave = concat_610_interleave_0, values = (concat_610_values0_0, gather_326_cast_uint16_to_int32))[name = string("concat_610")]; tensor var_5965_begin_0 = const()[name = string("op_5965_begin_0"), val = tensor([0, 0])]; tensor var_5965_end_mask_0 = const()[name = string("op_5965_end_mask_0"), val = tensor([true, false])]; tensor var_5965_cast_fp16 = slice_by_index(begin = var_5965_begin_0, end = concat_610, end_mask = var_5965_end_mask_0, x = var_5964_cast_fp16)[name = string("op_5965_cast_fp16")]; tensor qk_165_cast_fp16 = add(x = qk_163_cast_fp16, y = var_5965_cast_fp16)[name = string("qk_165_cast_fp16")]; tensor var_5968_cast_fp16 = softmax(axis = var_5877, x = qk_165_cast_fp16)[name = string("op_5968_cast_fp16")]; bool var_5970_transpose_x_0 = const()[name = string("op_5970_transpose_x_0"), val = bool(false)]; bool var_5970_transpose_y_0 = const()[name = string("op_5970_transpose_y_0"), val = bool(false)]; tensor v_275_cast_fp16 = transpose(perm = var_5961, x = var_5960_cast_fp16)[name = string("transpose_424")]; tensor var_5970_cast_fp16 = matmul(transpose_x = var_5970_transpose_x_0, transpose_y = var_5970_transpose_y_0, x = var_5968_cast_fp16, y = v_275_cast_fp16)[name = string("op_5970_cast_fp16")]; tensor var_5971 = const()[name = string("op_5971"), val = tensor([0, 2, 1, 3])]; tensor concat_611x = const()[name = string("concat_611x"), val = tensor([1, -1, 1280])]; tensor var_5972_cast_fp16 = transpose(perm = var_5971, x = var_5970_cast_fp16)[name = string("transpose_421")]; tensor x_493_cast_fp16 = reshape(shape = concat_611x, x = var_5972_cast_fp16)[name = string("x_493_cast_fp16")]; tensor var_5976_to_fp16 = const()[name = string("op_5976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387781888)))]; tensor var_5977_to_fp16 = const()[name = string("op_5977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391058752)))]; tensor linear_219_cast_fp16 = linear(bias = var_5977_to_fp16, weight = var_5976_to_fp16, x = x_493_cast_fp16)[name = string("linear_219_cast_fp16")]; tensor x_495_cast_fp16 = add(x = x_489_cast_fp16, y = linear_219_cast_fp16)[name = string("x_495_cast_fp16")]; tensor var_5984_axes_0 = const()[name = string("op_5984_axes_0"), val = tensor([-1])]; tensor blocks_27_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391061376)))]; tensor blocks_27_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391064000)))]; tensor var_5984_cast_fp16 = layer_norm(axes = var_5984_axes_0, beta = blocks_27_cross_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_cross_attn_ln_weight_to_fp16, x = x_495_cast_fp16)[name = string("op_5984_cast_fp16")]; tensor var_5993_to_fp16 = const()[name = string("op_5993_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391066624)))]; tensor var_5994_to_fp16 = const()[name = string("op_5994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394343488)))]; tensor linear_220_cast_fp16 = linear(bias = var_5994_to_fp16, weight = var_5993_to_fp16, x = var_5984_cast_fp16)[name = string("linear_220_cast_fp16")]; tensor concat_612 = const()[name = string("concat_612"), val = tensor([0, 0, 0])]; tensor concat_613 = const()[name = string("concat_613"), val = tensor([0, 1500, 0])]; tensor k_277_internal_tensor_assign_1_stride_0 = const()[name = string("k_277_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_612, begin_mask = k_277_internal_tensor_assign_1_begin_mask_0, end = concat_613, end_mask = k_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_277_internal_tensor_assign_1_squeeze_mask_0, stride = k_277_internal_tensor_assign_1_stride_0, update = k_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("k_277_internal_tensor_assign_1_cast_fp16")]; tensor concat_614 = const()[name = string("concat_614"), val = tensor([0, 0, 0])]; tensor concat_615 = const()[name = string("concat_615"), val = tensor([0, 1500, 0])]; tensor v_277_internal_tensor_assign_1_stride_0 = const()[name = string("v_277_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_614, begin_mask = v_277_internal_tensor_assign_1_begin_mask_0, end = concat_615, end_mask = v_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_277_internal_tensor_assign_1_squeeze_mask_0, stride = v_277_internal_tensor_assign_1_stride_0, update = v_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("v_277_internal_tensor_assign_1_cast_fp16")]; tensor concat_616x = const()[name = string("concat_616x"), val = tensor([1, -1, 20, 64])]; tensor var_6014_cast_fp16 = reshape(shape = concat_616x, x = linear_220_cast_fp16)[name = string("op_6014_cast_fp16")]; tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_223_cast_fp16 = mul(x = var_6014_cast_fp16, y = const_270_to_fp16)[name = string("q_223_cast_fp16")]; tensor var_6020 = const()[name = string("op_6020"), val = tensor([1, 1500, 20, -1])]; tensor var_6021_cast_fp16 = reshape(shape = var_6020, x = k_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6021_cast_fp16")]; tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_279_cast_fp16 = mul(x = var_6021_cast_fp16, y = const_271_to_fp16)[name = string("k_279_cast_fp16")]; tensor var_6027 = const()[name = string("op_6027"), val = tensor([1, 1500, 20, -1])]; tensor var_6028_cast_fp16 = reshape(shape = var_6027, x = v_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6028_cast_fp16")]; tensor var_6029 = const()[name = string("op_6029"), val = tensor([0, 2, 1, 3])]; bool qk_167_transpose_x_0 = const()[name = string("qk_167_transpose_x_0"), val = bool(false)]; bool qk_167_transpose_y_0 = const()[name = string("qk_167_transpose_y_0"), val = bool(false)]; tensor transpose_367_perm_0 = const()[name = string("transpose_367_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_368_perm_0 = const()[name = string("transpose_368_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_368 = transpose(perm = transpose_368_perm_0, x = k_279_cast_fp16)[name = string("transpose_418")]; tensor transpose_367 = transpose(perm = transpose_367_perm_0, x = q_223_cast_fp16)[name = string("transpose_419")]; tensor qk_167_cast_fp16 = matmul(transpose_x = qk_167_transpose_x_0, transpose_y = qk_167_transpose_y_0, x = transpose_367, y = transpose_368)[name = string("qk_167_cast_fp16")]; tensor var_6033_cast_fp16 = softmax(axis = var_5877, x = qk_167_cast_fp16)[name = string("op_6033_cast_fp16")]; bool var_6035_transpose_x_0 = const()[name = string("op_6035_transpose_x_0"), val = bool(false)]; bool var_6035_transpose_y_0 = const()[name = string("op_6035_transpose_y_0"), val = bool(false)]; tensor v_279_cast_fp16 = transpose(perm = var_6029, x = var_6028_cast_fp16)[name = string("transpose_420")]; tensor var_6035_cast_fp16 = matmul(transpose_x = var_6035_transpose_x_0, transpose_y = var_6035_transpose_y_0, x = var_6033_cast_fp16, y = v_279_cast_fp16)[name = string("op_6035_cast_fp16")]; tensor var_6036 = const()[name = string("op_6036"), val = tensor([0, 2, 1, 3])]; tensor concat_617x = const()[name = string("concat_617x"), val = tensor([1, -1, 1280])]; tensor var_6037_cast_fp16 = transpose(perm = var_6036, x = var_6035_cast_fp16)[name = string("transpose_417")]; tensor x_499_cast_fp16 = reshape(shape = concat_617x, x = var_6037_cast_fp16)[name = string("x_499_cast_fp16")]; tensor var_6041_to_fp16 = const()[name = string("op_6041_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394346112)))]; tensor var_6042_to_fp16 = const()[name = string("op_6042_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397622976)))]; tensor linear_221_cast_fp16 = linear(bias = var_6042_to_fp16, weight = var_6041_to_fp16, x = x_499_cast_fp16)[name = string("linear_221_cast_fp16")]; tensor x_501_cast_fp16 = add(x = x_495_cast_fp16, y = linear_221_cast_fp16)[name = string("x_501_cast_fp16")]; tensor var_6049_axes_0 = const()[name = string("op_6049_axes_0"), val = tensor([-1])]; tensor blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397625600)))]; tensor blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397628224)))]; tensor var_6049_cast_fp16 = layer_norm(axes = var_6049_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_501_cast_fp16)[name = string("op_6049_cast_fp16")]; tensor var_6058_to_fp16 = const()[name = string("op_6058_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397630848)))]; tensor var_6059_to_fp16 = const()[name = string("op_6059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410738112)))]; tensor linear_222_cast_fp16 = linear(bias = var_6059_to_fp16, weight = var_6058_to_fp16, x = var_6049_cast_fp16)[name = string("linear_222_cast_fp16")]; string x_505_mode_0 = const()[name = string("x_505_mode_0"), val = string("EXACT")]; tensor x_505_cast_fp16 = gelu(mode = x_505_mode_0, x = linear_222_cast_fp16)[name = string("x_505_cast_fp16")]; tensor var_6064_to_fp16 = const()[name = string("op_6064_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410748416)))]; tensor var_6065_to_fp16 = const()[name = string("op_6065_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423855680)))]; tensor linear_223_cast_fp16 = linear(bias = var_6065_to_fp16, weight = var_6064_to_fp16, x = x_505_cast_fp16)[name = string("linear_223_cast_fp16")]; tensor x_507_cast_fp16 = add(x = x_501_cast_fp16, y = linear_223_cast_fp16)[name = string("x_507_cast_fp16")]; tensor k_cache_113_begin_0 = const()[name = string("k_cache_113_begin_0"), val = tensor([28, 0, 0, 0])]; tensor k_cache_113_end_0 = const()[name = string("k_cache_113_end_0"), val = tensor([29, 1, 448, 1280])]; tensor k_cache_113_end_mask_0 = const()[name = string("k_cache_113_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_113_squeeze_mask_0 = const()[name = string("k_cache_113_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_113_cast_fp16 = slice_by_index(begin = k_cache_113_begin_0, end = k_cache_113_end_0, end_mask = k_cache_113_end_mask_0, squeeze_mask = k_cache_113_squeeze_mask_0, x = coreml_update_state_118)[name = string("k_cache_113_cast_fp16")]; tensor v_cache_113_begin_0 = const()[name = string("v_cache_113_begin_0"), val = tensor([28, 0, 0, 0])]; tensor v_cache_113_end_0 = const()[name = string("v_cache_113_end_0"), val = tensor([29, 1, 448, 1280])]; tensor v_cache_113_end_mask_0 = const()[name = string("v_cache_113_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_113_squeeze_mask_0 = const()[name = string("v_cache_113_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_113_cast_fp16 = slice_by_index(begin = v_cache_113_begin_0, end = v_cache_113_end_0, end_mask = v_cache_113_end_mask_0, squeeze_mask = v_cache_113_squeeze_mask_0, x = coreml_update_state_119)[name = string("v_cache_113_cast_fp16")]; tensor k_cache_115_begin_0 = const()[name = string("k_cache_115_begin_0"), val = tensor([28, 0, 0, 0])]; tensor k_cache_115_end_0 = const()[name = string("k_cache_115_end_0"), val = tensor([29, 1, 1500, 1280])]; tensor k_cache_115_end_mask_0 = const()[name = string("k_cache_115_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_115_squeeze_mask_0 = const()[name = string("k_cache_115_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_115_cast_fp16 = slice_by_index(begin = k_cache_115_begin_0, end = k_cache_115_end_0, end_mask = k_cache_115_end_mask_0, squeeze_mask = k_cache_115_squeeze_mask_0, x = read_state_2)[name = string("k_cache_115_cast_fp16")]; tensor v_cache_115_begin_0 = const()[name = string("v_cache_115_begin_0"), val = tensor([28, 0, 0, 0])]; tensor v_cache_115_end_0 = const()[name = string("v_cache_115_end_0"), val = tensor([29, 1, 1500, 1280])]; tensor v_cache_115_end_mask_0 = const()[name = string("v_cache_115_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_115_squeeze_mask_0 = const()[name = string("v_cache_115_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_115_cast_fp16 = slice_by_index(begin = v_cache_115_begin_0, end = v_cache_115_end_0, end_mask = v_cache_115_end_mask_0, squeeze_mask = v_cache_115_squeeze_mask_0, x = read_state_3)[name = string("v_cache_115_cast_fp16")]; int32 var_6088 = const()[name = string("op_6088"), val = int32(-1)]; tensor var_6106_axes_0 = const()[name = string("op_6106_axes_0"), val = tensor([-1])]; tensor blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423858304)))]; tensor blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423860928)))]; fp16 var_6094_to_fp16 = const()[name = string("op_6094_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_6106_cast_fp16 = layer_norm(axes = var_6106_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_507_cast_fp16)[name = string("op_6106_cast_fp16")]; tensor var_6117_to_fp16 = const()[name = string("op_6117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423863552)))]; tensor var_6118_to_fp16 = const()[name = string("op_6118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427140416)))]; tensor linear_224_cast_fp16 = linear(bias = var_6118_to_fp16, weight = var_6117_to_fp16, x = var_6106_cast_fp16)[name = string("linear_224_cast_fp16")]; tensor var_6121_to_fp16 = const()[name = string("op_6121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427143040)))]; tensor linear_225_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6121_to_fp16, x = var_6106_cast_fp16)[name = string("linear_225_cast_fp16")]; tensor var_6125_to_fp16 = const()[name = string("op_6125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1430419904)))]; tensor var_6126_to_fp16 = const()[name = string("op_6126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433696768)))]; tensor linear_226_cast_fp16 = linear(bias = var_6126_to_fp16, weight = var_6125_to_fp16, x = var_6106_cast_fp16)[name = string("linear_226_cast_fp16")]; tensor var_6128_shape_cast_fp16 = shape(x = linear_224_cast_fp16)[name = string("op_6128_shape_cast_fp16")]; int32 gather_338_axis_0 = const()[name = string("gather_338_axis_0"), val = int32(0)]; int32 gather_338_batch_dims_0 = const()[name = string("gather_338_batch_dims_0"), val = int32(0)]; bool gather_338_validate_indices_0 = const()[name = string("gather_338_validate_indices_0"), val = bool(false)]; string var_6128_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6128_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_338_to_uint16 = const()[name = string("select_338_to_uint16"), val = uint16(1)]; tensor var_6128_shape_cast_fp16_to_uint16 = cast(dtype = var_6128_shape_cast_fp16_to_uint16_dtype_0, x = var_6128_shape_cast_fp16)[name = string("cast_334")]; uint16 gather_338_cast_uint16 = gather(axis = gather_338_axis_0, batch_dims = gather_338_batch_dims_0, indices = select_338_to_uint16, validate_indices = gather_338_validate_indices_0, x = var_6128_shape_cast_fp16_to_uint16)[name = string("gather_338_cast_uint16")]; string gather_338_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_338_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_338_cast_uint16_to_int32 = cast(dtype = gather_338_cast_uint16_to_int32_dtype_0, x = gather_338_cast_uint16)[name = string("cast_333")]; int32 end_step_59 = add(x = offset, y = gather_338_cast_uint16_to_int32)[name = string("end_step_59")]; tensor expand_dims_448 = const()[name = string("expand_dims_448"), val = tensor([0])]; tensor expand_dims_450 = const()[name = string("expand_dims_450"), val = tensor([0])]; tensor expand_dims_451_axes_0 = const()[name = string("expand_dims_451_axes_0"), val = tensor([0])]; tensor expand_dims_451 = expand_dims(axes = expand_dims_451_axes_0, x = end_step_59)[name = string("expand_dims_451")]; tensor concat_620_values0_0 = const()[name = string("concat_620_values0_0"), val = tensor([28])]; int32 concat_620_axis_0 = const()[name = string("concat_620_axis_0"), val = int32(0)]; bool concat_620_interleave_0 = const()[name = string("concat_620_interleave_0"), val = bool(false)]; tensor concat_620 = concat(axis = concat_620_axis_0, interleave = concat_620_interleave_0, values = (concat_620_values0_0, expand_dims_448, expand_dims_1, expand_dims_450))[name = string("concat_620")]; tensor concat_621_values0_0 = const()[name = string("concat_621_values0_0"), val = tensor([0])]; tensor concat_621_values1_0 = const()[name = string("concat_621_values1_0"), val = tensor([0])]; tensor concat_621_values3_0 = const()[name = string("concat_621_values3_0"), val = tensor([0])]; int32 concat_621_axis_0 = const()[name = string("concat_621_axis_0"), val = int32(0)]; bool concat_621_interleave_0 = const()[name = string("concat_621_interleave_0"), val = bool(false)]; tensor concat_621 = concat(axis = concat_621_axis_0, interleave = concat_621_interleave_0, values = (concat_621_values0_0, concat_621_values1_0, expand_dims_451, concat_621_values3_0))[name = string("concat_621")]; tensor k_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = k_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = k_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_29_stride_0, update = linear_225_cast_fp16, x = coreml_update_state_118)[name = string("k_cache1_internal_tensor_assign_29_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_29_cast_fp16, input = k_cache1)[name = string("coreml_update_state_120_write_state")]; tensor coreml_update_state_120 = read_state(input = k_cache1)[name = string("coreml_update_state_120")]; tensor v_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = v_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = v_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_29_stride_0, update = linear_226_cast_fp16, x = coreml_update_state_119)[name = string("v_cache1_internal_tensor_assign_29_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_29_cast_fp16, input = v_cache1)[name = string("coreml_update_state_121_write_state")]; tensor coreml_update_state_121 = read_state(input = v_cache1)[name = string("coreml_update_state_121")]; int32 concat_626_values0_0 = const()[name = string("concat_626_values0_0"), val = int32(1)]; int32 concat_626_values2_0 = const()[name = string("concat_626_values2_0"), val = int32(1280)]; int32 concat_626_axis_0 = const()[name = string("concat_626_axis_0"), val = int32(0)]; bool concat_626_interleave_0 = const()[name = string("concat_626_interleave_0"), val = bool(false)]; tensor concat_626 = concat(axis = concat_626_axis_0, interleave = concat_626_interleave_0, values = (concat_626_values0_0, end_step_59, concat_626_values2_0))[name = string("concat_626")]; tensor var_6144_begin_0 = const()[name = string("op_6144_begin_0"), val = tensor([0, 0, 0])]; tensor var_6144_end_mask_0 = const()[name = string("op_6144_end_mask_0"), val = tensor([true, false, true])]; tensor var_6144_cast_fp16 = slice_by_index(begin = var_6144_begin_0, end = concat_626, end_mask = var_6144_end_mask_0, x = k_cache_113_cast_fp16)[name = string("op_6144_cast_fp16")]; tensor var_6147_begin_0 = const()[name = string("op_6147_begin_0"), val = tensor([0, 0, 0])]; tensor var_6147_end_mask_0 = const()[name = string("op_6147_end_mask_0"), val = tensor([true, false, true])]; tensor var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = concat_626, end_mask = var_6147_end_mask_0, x = v_cache_113_cast_fp16)[name = string("op_6147_cast_fp16")]; tensor concat_628x = const()[name = string("concat_628x"), val = tensor([1, -1, 20, 64])]; tensor var_6157_cast_fp16 = reshape(shape = concat_628x, x = linear_224_cast_fp16)[name = string("op_6157_cast_fp16")]; tensor const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_227_cast_fp16 = mul(x = var_6157_cast_fp16, y = const_272_to_fp16)[name = string("q_227_cast_fp16")]; tensor concat_629x = const()[name = string("concat_629x"), val = tensor([1, -1, 20, 64])]; tensor var_6164_cast_fp16 = reshape(shape = concat_629x, x = var_6144_cast_fp16)[name = string("op_6164_cast_fp16")]; tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_285_cast_fp16 = mul(x = var_6164_cast_fp16, y = const_273_to_fp16)[name = string("k_285_cast_fp16")]; tensor concat_630x = const()[name = string("concat_630x"), val = tensor([1, -1, 20, 64])]; tensor var_6171_cast_fp16 = reshape(shape = concat_630x, x = var_6147_cast_fp16)[name = string("op_6171_cast_fp16")]; tensor var_6172 = const()[name = string("op_6172"), val = tensor([0, 2, 1, 3])]; bool qk_169_transpose_x_0 = const()[name = string("qk_169_transpose_x_0"), val = bool(false)]; bool qk_169_transpose_y_0 = const()[name = string("qk_169_transpose_y_0"), val = bool(false)]; tensor transpose_369_perm_0 = const()[name = string("transpose_369_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_370_perm_0 = const()[name = string("transpose_370_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_370 = transpose(perm = transpose_370_perm_0, x = k_285_cast_fp16)[name = string("transpose_414")]; tensor transpose_369 = transpose(perm = transpose_369_perm_0, x = q_227_cast_fp16)[name = string("transpose_415")]; tensor qk_169_cast_fp16 = matmul(transpose_x = qk_169_transpose_x_0, transpose_y = qk_169_transpose_y_0, x = transpose_369, y = transpose_370)[name = string("qk_169_cast_fp16")]; int32 concat_631_values1_0 = const()[name = string("concat_631_values1_0"), val = int32(448)]; int32 concat_631_axis_0 = const()[name = string("concat_631_axis_0"), val = int32(0)]; bool concat_631_interleave_0 = const()[name = string("concat_631_interleave_0"), val = bool(false)]; tensor concat_631 = concat(axis = concat_631_axis_0, interleave = concat_631_interleave_0, values = (gather_338_cast_uint16_to_int32, concat_631_values1_0))[name = string("concat_631")]; tensor var_6175_begin_0 = const()[name = string("op_6175_begin_0"), val = tensor([0, 0])]; tensor var_6175_end_mask_0 = const()[name = string("op_6175_end_mask_0"), val = tensor([false, true])]; tensor var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = concat_631, end_mask = var_6175_end_mask_0, x = mask_to_fp16)[name = string("op_6175_cast_fp16")]; int32 concat_632_values0_0 = const()[name = string("concat_632_values0_0"), val = int32(0)]; int32 concat_632_axis_0 = const()[name = string("concat_632_axis_0"), val = int32(0)]; bool concat_632_interleave_0 = const()[name = string("concat_632_interleave_0"), val = bool(false)]; tensor concat_632 = concat(axis = concat_632_axis_0, interleave = concat_632_interleave_0, values = (concat_632_values0_0, gather_338_cast_uint16_to_int32))[name = string("concat_632")]; tensor var_6176_begin_0 = const()[name = string("op_6176_begin_0"), val = tensor([0, 0])]; tensor var_6176_end_mask_0 = const()[name = string("op_6176_end_mask_0"), val = tensor([true, false])]; tensor var_6176_cast_fp16 = slice_by_index(begin = var_6176_begin_0, end = concat_632, end_mask = var_6176_end_mask_0, x = var_6175_cast_fp16)[name = string("op_6176_cast_fp16")]; tensor qk_171_cast_fp16 = add(x = qk_169_cast_fp16, y = var_6176_cast_fp16)[name = string("qk_171_cast_fp16")]; tensor var_6179_cast_fp16 = softmax(axis = var_6088, x = qk_171_cast_fp16)[name = string("op_6179_cast_fp16")]; bool var_6181_transpose_x_0 = const()[name = string("op_6181_transpose_x_0"), val = bool(false)]; bool var_6181_transpose_y_0 = const()[name = string("op_6181_transpose_y_0"), val = bool(false)]; tensor v_285_cast_fp16 = transpose(perm = var_6172, x = var_6171_cast_fp16)[name = string("transpose_416")]; tensor var_6181_cast_fp16 = matmul(transpose_x = var_6181_transpose_x_0, transpose_y = var_6181_transpose_y_0, x = var_6179_cast_fp16, y = v_285_cast_fp16)[name = string("op_6181_cast_fp16")]; tensor var_6182 = const()[name = string("op_6182"), val = tensor([0, 2, 1, 3])]; tensor concat_633x = const()[name = string("concat_633x"), val = tensor([1, -1, 1280])]; tensor var_6183_cast_fp16 = transpose(perm = var_6182, x = var_6181_cast_fp16)[name = string("transpose_413")]; tensor x_511_cast_fp16 = reshape(shape = concat_633x, x = var_6183_cast_fp16)[name = string("x_511_cast_fp16")]; tensor var_6187_to_fp16 = const()[name = string("op_6187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433699392)))]; tensor var_6188_to_fp16 = const()[name = string("op_6188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436976256)))]; tensor linear_227_cast_fp16 = linear(bias = var_6188_to_fp16, weight = var_6187_to_fp16, x = x_511_cast_fp16)[name = string("linear_227_cast_fp16")]; tensor x_513_cast_fp16 = add(x = x_507_cast_fp16, y = linear_227_cast_fp16)[name = string("x_513_cast_fp16")]; tensor var_6195_axes_0 = const()[name = string("op_6195_axes_0"), val = tensor([-1])]; tensor blocks_28_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436978880)))]; tensor blocks_28_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436981504)))]; tensor var_6195_cast_fp16 = layer_norm(axes = var_6195_axes_0, beta = blocks_28_cross_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_cross_attn_ln_weight_to_fp16, x = x_513_cast_fp16)[name = string("op_6195_cast_fp16")]; tensor var_6204_to_fp16 = const()[name = string("op_6204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436984128)))]; tensor var_6205_to_fp16 = const()[name = string("op_6205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440260992)))]; tensor linear_228_cast_fp16 = linear(bias = var_6205_to_fp16, weight = var_6204_to_fp16, x = var_6195_cast_fp16)[name = string("linear_228_cast_fp16")]; tensor concat_634 = const()[name = string("concat_634"), val = tensor([0, 0, 0])]; tensor concat_635 = const()[name = string("concat_635"), val = tensor([0, 1500, 0])]; tensor k_287_internal_tensor_assign_1_stride_0 = const()[name = string("k_287_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_634, begin_mask = k_287_internal_tensor_assign_1_begin_mask_0, end = concat_635, end_mask = k_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_287_internal_tensor_assign_1_squeeze_mask_0, stride = k_287_internal_tensor_assign_1_stride_0, update = k_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("k_287_internal_tensor_assign_1_cast_fp16")]; tensor concat_636 = const()[name = string("concat_636"), val = tensor([0, 0, 0])]; tensor concat_637 = const()[name = string("concat_637"), val = tensor([0, 1500, 0])]; tensor v_287_internal_tensor_assign_1_stride_0 = const()[name = string("v_287_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_636, begin_mask = v_287_internal_tensor_assign_1_begin_mask_0, end = concat_637, end_mask = v_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_287_internal_tensor_assign_1_squeeze_mask_0, stride = v_287_internal_tensor_assign_1_stride_0, update = v_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("v_287_internal_tensor_assign_1_cast_fp16")]; tensor concat_638x = const()[name = string("concat_638x"), val = tensor([1, -1, 20, 64])]; tensor var_6225_cast_fp16 = reshape(shape = concat_638x, x = linear_228_cast_fp16)[name = string("op_6225_cast_fp16")]; tensor const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_231_cast_fp16 = mul(x = var_6225_cast_fp16, y = const_274_to_fp16)[name = string("q_231_cast_fp16")]; tensor var_6231 = const()[name = string("op_6231"), val = tensor([1, 1500, 20, -1])]; tensor var_6232_cast_fp16 = reshape(shape = var_6231, x = k_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6232_cast_fp16")]; tensor const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_289_cast_fp16 = mul(x = var_6232_cast_fp16, y = const_275_to_fp16)[name = string("k_289_cast_fp16")]; tensor var_6238 = const()[name = string("op_6238"), val = tensor([1, 1500, 20, -1])]; tensor var_6239_cast_fp16 = reshape(shape = var_6238, x = v_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6239_cast_fp16")]; tensor var_6240 = const()[name = string("op_6240"), val = tensor([0, 2, 1, 3])]; bool qk_173_transpose_x_0 = const()[name = string("qk_173_transpose_x_0"), val = bool(false)]; bool qk_173_transpose_y_0 = const()[name = string("qk_173_transpose_y_0"), val = bool(false)]; tensor transpose_371_perm_0 = const()[name = string("transpose_371_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_372_perm_0 = const()[name = string("transpose_372_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_372 = transpose(perm = transpose_372_perm_0, x = k_289_cast_fp16)[name = string("transpose_410")]; tensor transpose_371 = transpose(perm = transpose_371_perm_0, x = q_231_cast_fp16)[name = string("transpose_411")]; tensor qk_173_cast_fp16 = matmul(transpose_x = qk_173_transpose_x_0, transpose_y = qk_173_transpose_y_0, x = transpose_371, y = transpose_372)[name = string("qk_173_cast_fp16")]; tensor var_6244_cast_fp16 = softmax(axis = var_6088, x = qk_173_cast_fp16)[name = string("op_6244_cast_fp16")]; bool var_6246_transpose_x_0 = const()[name = string("op_6246_transpose_x_0"), val = bool(false)]; bool var_6246_transpose_y_0 = const()[name = string("op_6246_transpose_y_0"), val = bool(false)]; tensor v_289_cast_fp16 = transpose(perm = var_6240, x = var_6239_cast_fp16)[name = string("transpose_412")]; tensor var_6246_cast_fp16 = matmul(transpose_x = var_6246_transpose_x_0, transpose_y = var_6246_transpose_y_0, x = var_6244_cast_fp16, y = v_289_cast_fp16)[name = string("op_6246_cast_fp16")]; tensor var_6247 = const()[name = string("op_6247"), val = tensor([0, 2, 1, 3])]; tensor concat_639x = const()[name = string("concat_639x"), val = tensor([1, -1, 1280])]; tensor var_6248_cast_fp16 = transpose(perm = var_6247, x = var_6246_cast_fp16)[name = string("transpose_409")]; tensor x_517_cast_fp16 = reshape(shape = concat_639x, x = var_6248_cast_fp16)[name = string("x_517_cast_fp16")]; tensor var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440263616)))]; tensor var_6253_to_fp16 = const()[name = string("op_6253_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443540480)))]; tensor linear_229_cast_fp16 = linear(bias = var_6253_to_fp16, weight = var_6252_to_fp16, x = x_517_cast_fp16)[name = string("linear_229_cast_fp16")]; tensor x_519_cast_fp16 = add(x = x_513_cast_fp16, y = linear_229_cast_fp16)[name = string("x_519_cast_fp16")]; tensor var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor([-1])]; tensor blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443543104)))]; tensor blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443545728)))]; tensor var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_519_cast_fp16)[name = string("op_6260_cast_fp16")]; tensor var_6269_to_fp16 = const()[name = string("op_6269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443548352)))]; tensor var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456655616)))]; tensor linear_230_cast_fp16 = linear(bias = var_6270_to_fp16, weight = var_6269_to_fp16, x = var_6260_cast_fp16)[name = string("linear_230_cast_fp16")]; string x_523_mode_0 = const()[name = string("x_523_mode_0"), val = string("EXACT")]; tensor x_523_cast_fp16 = gelu(mode = x_523_mode_0, x = linear_230_cast_fp16)[name = string("x_523_cast_fp16")]; tensor var_6275_to_fp16 = const()[name = string("op_6275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456665920)))]; tensor var_6276_to_fp16 = const()[name = string("op_6276_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469773184)))]; tensor linear_231_cast_fp16 = linear(bias = var_6276_to_fp16, weight = var_6275_to_fp16, x = x_523_cast_fp16)[name = string("linear_231_cast_fp16")]; tensor x_525_cast_fp16 = add(x = x_519_cast_fp16, y = linear_231_cast_fp16)[name = string("x_525_cast_fp16")]; tensor k_cache_117_begin_0 = const()[name = string("k_cache_117_begin_0"), val = tensor([29, 0, 0, 0])]; tensor k_cache_117_end_0 = const()[name = string("k_cache_117_end_0"), val = tensor([30, 1, 448, 1280])]; tensor k_cache_117_end_mask_0 = const()[name = string("k_cache_117_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_117_squeeze_mask_0 = const()[name = string("k_cache_117_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_117_cast_fp16 = slice_by_index(begin = k_cache_117_begin_0, end = k_cache_117_end_0, end_mask = k_cache_117_end_mask_0, squeeze_mask = k_cache_117_squeeze_mask_0, x = coreml_update_state_120)[name = string("k_cache_117_cast_fp16")]; tensor v_cache_117_begin_0 = const()[name = string("v_cache_117_begin_0"), val = tensor([29, 0, 0, 0])]; tensor v_cache_117_end_0 = const()[name = string("v_cache_117_end_0"), val = tensor([30, 1, 448, 1280])]; tensor v_cache_117_end_mask_0 = const()[name = string("v_cache_117_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_117_squeeze_mask_0 = const()[name = string("v_cache_117_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_117_cast_fp16 = slice_by_index(begin = v_cache_117_begin_0, end = v_cache_117_end_0, end_mask = v_cache_117_end_mask_0, squeeze_mask = v_cache_117_squeeze_mask_0, x = coreml_update_state_121)[name = string("v_cache_117_cast_fp16")]; tensor k_cache_119_begin_0 = const()[name = string("k_cache_119_begin_0"), val = tensor([29, 0, 0, 0])]; tensor k_cache_119_end_0 = const()[name = string("k_cache_119_end_0"), val = tensor([30, 1, 1500, 1280])]; tensor k_cache_119_end_mask_0 = const()[name = string("k_cache_119_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_119_squeeze_mask_0 = const()[name = string("k_cache_119_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_119_cast_fp16 = slice_by_index(begin = k_cache_119_begin_0, end = k_cache_119_end_0, end_mask = k_cache_119_end_mask_0, squeeze_mask = k_cache_119_squeeze_mask_0, x = read_state_2)[name = string("k_cache_119_cast_fp16")]; tensor v_cache_119_begin_0 = const()[name = string("v_cache_119_begin_0"), val = tensor([29, 0, 0, 0])]; tensor v_cache_119_end_0 = const()[name = string("v_cache_119_end_0"), val = tensor([30, 1, 1500, 1280])]; tensor v_cache_119_end_mask_0 = const()[name = string("v_cache_119_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_119_squeeze_mask_0 = const()[name = string("v_cache_119_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_119_cast_fp16 = slice_by_index(begin = v_cache_119_begin_0, end = v_cache_119_end_0, end_mask = v_cache_119_end_mask_0, squeeze_mask = v_cache_119_squeeze_mask_0, x = read_state_3)[name = string("v_cache_119_cast_fp16")]; int32 var_6299 = const()[name = string("op_6299"), val = int32(-1)]; tensor var_6317_axes_0 = const()[name = string("op_6317_axes_0"), val = tensor([-1])]; tensor blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469775808)))]; tensor blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469778432)))]; fp16 var_6305_to_fp16 = const()[name = string("op_6305_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_6317_cast_fp16 = layer_norm(axes = var_6317_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_525_cast_fp16)[name = string("op_6317_cast_fp16")]; tensor var_6328_to_fp16 = const()[name = string("op_6328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469781056)))]; tensor var_6329_to_fp16 = const()[name = string("op_6329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473057920)))]; tensor linear_232_cast_fp16 = linear(bias = var_6329_to_fp16, weight = var_6328_to_fp16, x = var_6317_cast_fp16)[name = string("linear_232_cast_fp16")]; tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473060544)))]; tensor linear_233_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6332_to_fp16, x = var_6317_cast_fp16)[name = string("linear_233_cast_fp16")]; tensor var_6336_to_fp16 = const()[name = string("op_6336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1476337408)))]; tensor var_6337_to_fp16 = const()[name = string("op_6337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479614272)))]; tensor linear_234_cast_fp16 = linear(bias = var_6337_to_fp16, weight = var_6336_to_fp16, x = var_6317_cast_fp16)[name = string("linear_234_cast_fp16")]; tensor var_6339_shape_cast_fp16 = shape(x = linear_232_cast_fp16)[name = string("op_6339_shape_cast_fp16")]; int32 gather_350_axis_0 = const()[name = string("gather_350_axis_0"), val = int32(0)]; int32 gather_350_batch_dims_0 = const()[name = string("gather_350_batch_dims_0"), val = int32(0)]; bool gather_350_validate_indices_0 = const()[name = string("gather_350_validate_indices_0"), val = bool(false)]; string var_6339_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6339_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_350_to_uint16 = const()[name = string("select_350_to_uint16"), val = uint16(1)]; tensor var_6339_shape_cast_fp16_to_uint16 = cast(dtype = var_6339_shape_cast_fp16_to_uint16_dtype_0, x = var_6339_shape_cast_fp16)[name = string("cast_332")]; uint16 gather_350_cast_uint16 = gather(axis = gather_350_axis_0, batch_dims = gather_350_batch_dims_0, indices = select_350_to_uint16, validate_indices = gather_350_validate_indices_0, x = var_6339_shape_cast_fp16_to_uint16)[name = string("gather_350_cast_uint16")]; string gather_350_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_350_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_350_cast_uint16_to_int32 = cast(dtype = gather_350_cast_uint16_to_int32_dtype_0, x = gather_350_cast_uint16)[name = string("cast_331")]; int32 end_step_61 = add(x = offset, y = gather_350_cast_uint16_to_int32)[name = string("end_step_61")]; tensor expand_dims_464 = const()[name = string("expand_dims_464"), val = tensor([0])]; tensor expand_dims_466 = const()[name = string("expand_dims_466"), val = tensor([0])]; tensor expand_dims_467_axes_0 = const()[name = string("expand_dims_467_axes_0"), val = tensor([0])]; tensor expand_dims_467 = expand_dims(axes = expand_dims_467_axes_0, x = end_step_61)[name = string("expand_dims_467")]; tensor concat_642_values0_0 = const()[name = string("concat_642_values0_0"), val = tensor([29])]; int32 concat_642_axis_0 = const()[name = string("concat_642_axis_0"), val = int32(0)]; bool concat_642_interleave_0 = const()[name = string("concat_642_interleave_0"), val = bool(false)]; tensor concat_642 = concat(axis = concat_642_axis_0, interleave = concat_642_interleave_0, values = (concat_642_values0_0, expand_dims_464, expand_dims_1, expand_dims_466))[name = string("concat_642")]; tensor concat_643_values0_0 = const()[name = string("concat_643_values0_0"), val = tensor([0])]; tensor concat_643_values1_0 = const()[name = string("concat_643_values1_0"), val = tensor([0])]; tensor concat_643_values3_0 = const()[name = string("concat_643_values3_0"), val = tensor([0])]; int32 concat_643_axis_0 = const()[name = string("concat_643_axis_0"), val = int32(0)]; bool concat_643_interleave_0 = const()[name = string("concat_643_interleave_0"), val = bool(false)]; tensor concat_643 = concat(axis = concat_643_axis_0, interleave = concat_643_interleave_0, values = (concat_643_values0_0, concat_643_values1_0, expand_dims_467, concat_643_values3_0))[name = string("concat_643")]; tensor k_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = k_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = k_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_30_stride_0, update = linear_233_cast_fp16, x = coreml_update_state_120)[name = string("k_cache1_internal_tensor_assign_30_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_30_cast_fp16, input = k_cache1)[name = string("coreml_update_state_122_write_state")]; tensor coreml_update_state_122 = read_state(input = k_cache1)[name = string("coreml_update_state_122")]; tensor v_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = v_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = v_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_30_stride_0, update = linear_234_cast_fp16, x = coreml_update_state_121)[name = string("v_cache1_internal_tensor_assign_30_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_30_cast_fp16, input = v_cache1)[name = string("coreml_update_state_123_write_state")]; tensor coreml_update_state_123 = read_state(input = v_cache1)[name = string("coreml_update_state_123")]; int32 concat_648_values0_0 = const()[name = string("concat_648_values0_0"), val = int32(1)]; int32 concat_648_values2_0 = const()[name = string("concat_648_values2_0"), val = int32(1280)]; int32 concat_648_axis_0 = const()[name = string("concat_648_axis_0"), val = int32(0)]; bool concat_648_interleave_0 = const()[name = string("concat_648_interleave_0"), val = bool(false)]; tensor concat_648 = concat(axis = concat_648_axis_0, interleave = concat_648_interleave_0, values = (concat_648_values0_0, end_step_61, concat_648_values2_0))[name = string("concat_648")]; tensor var_6355_begin_0 = const()[name = string("op_6355_begin_0"), val = tensor([0, 0, 0])]; tensor var_6355_end_mask_0 = const()[name = string("op_6355_end_mask_0"), val = tensor([true, false, true])]; tensor var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = concat_648, end_mask = var_6355_end_mask_0, x = k_cache_117_cast_fp16)[name = string("op_6355_cast_fp16")]; tensor var_6358_begin_0 = const()[name = string("op_6358_begin_0"), val = tensor([0, 0, 0])]; tensor var_6358_end_mask_0 = const()[name = string("op_6358_end_mask_0"), val = tensor([true, false, true])]; tensor var_6358_cast_fp16 = slice_by_index(begin = var_6358_begin_0, end = concat_648, end_mask = var_6358_end_mask_0, x = v_cache_117_cast_fp16)[name = string("op_6358_cast_fp16")]; tensor concat_650x = const()[name = string("concat_650x"), val = tensor([1, -1, 20, 64])]; tensor var_6368_cast_fp16 = reshape(shape = concat_650x, x = linear_232_cast_fp16)[name = string("op_6368_cast_fp16")]; tensor const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_235_cast_fp16 = mul(x = var_6368_cast_fp16, y = const_276_to_fp16)[name = string("q_235_cast_fp16")]; tensor concat_651x = const()[name = string("concat_651x"), val = tensor([1, -1, 20, 64])]; tensor var_6375_cast_fp16 = reshape(shape = concat_651x, x = var_6355_cast_fp16)[name = string("op_6375_cast_fp16")]; tensor const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_295_cast_fp16 = mul(x = var_6375_cast_fp16, y = const_277_to_fp16)[name = string("k_295_cast_fp16")]; tensor concat_652x = const()[name = string("concat_652x"), val = tensor([1, -1, 20, 64])]; tensor var_6382_cast_fp16 = reshape(shape = concat_652x, x = var_6358_cast_fp16)[name = string("op_6382_cast_fp16")]; tensor var_6383 = const()[name = string("op_6383"), val = tensor([0, 2, 1, 3])]; bool qk_175_transpose_x_0 = const()[name = string("qk_175_transpose_x_0"), val = bool(false)]; bool qk_175_transpose_y_0 = const()[name = string("qk_175_transpose_y_0"), val = bool(false)]; tensor transpose_373_perm_0 = const()[name = string("transpose_373_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_374_perm_0 = const()[name = string("transpose_374_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_374 = transpose(perm = transpose_374_perm_0, x = k_295_cast_fp16)[name = string("transpose_406")]; tensor transpose_373 = transpose(perm = transpose_373_perm_0, x = q_235_cast_fp16)[name = string("transpose_407")]; tensor qk_175_cast_fp16 = matmul(transpose_x = qk_175_transpose_x_0, transpose_y = qk_175_transpose_y_0, x = transpose_373, y = transpose_374)[name = string("qk_175_cast_fp16")]; int32 concat_653_values1_0 = const()[name = string("concat_653_values1_0"), val = int32(448)]; int32 concat_653_axis_0 = const()[name = string("concat_653_axis_0"), val = int32(0)]; bool concat_653_interleave_0 = const()[name = string("concat_653_interleave_0"), val = bool(false)]; tensor concat_653 = concat(axis = concat_653_axis_0, interleave = concat_653_interleave_0, values = (gather_350_cast_uint16_to_int32, concat_653_values1_0))[name = string("concat_653")]; tensor var_6386_begin_0 = const()[name = string("op_6386_begin_0"), val = tensor([0, 0])]; tensor var_6386_end_mask_0 = const()[name = string("op_6386_end_mask_0"), val = tensor([false, true])]; tensor var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = concat_653, end_mask = var_6386_end_mask_0, x = mask_to_fp16)[name = string("op_6386_cast_fp16")]; int32 concat_654_values0_0 = const()[name = string("concat_654_values0_0"), val = int32(0)]; int32 concat_654_axis_0 = const()[name = string("concat_654_axis_0"), val = int32(0)]; bool concat_654_interleave_0 = const()[name = string("concat_654_interleave_0"), val = bool(false)]; tensor concat_654 = concat(axis = concat_654_axis_0, interleave = concat_654_interleave_0, values = (concat_654_values0_0, gather_350_cast_uint16_to_int32))[name = string("concat_654")]; tensor var_6387_begin_0 = const()[name = string("op_6387_begin_0"), val = tensor([0, 0])]; tensor var_6387_end_mask_0 = const()[name = string("op_6387_end_mask_0"), val = tensor([true, false])]; tensor var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = concat_654, end_mask = var_6387_end_mask_0, x = var_6386_cast_fp16)[name = string("op_6387_cast_fp16")]; tensor qk_177_cast_fp16 = add(x = qk_175_cast_fp16, y = var_6387_cast_fp16)[name = string("qk_177_cast_fp16")]; tensor var_6390_cast_fp16 = softmax(axis = var_6299, x = qk_177_cast_fp16)[name = string("op_6390_cast_fp16")]; bool var_6392_transpose_x_0 = const()[name = string("op_6392_transpose_x_0"), val = bool(false)]; bool var_6392_transpose_y_0 = const()[name = string("op_6392_transpose_y_0"), val = bool(false)]; tensor v_295_cast_fp16 = transpose(perm = var_6383, x = var_6382_cast_fp16)[name = string("transpose_408")]; tensor var_6392_cast_fp16 = matmul(transpose_x = var_6392_transpose_x_0, transpose_y = var_6392_transpose_y_0, x = var_6390_cast_fp16, y = v_295_cast_fp16)[name = string("op_6392_cast_fp16")]; tensor var_6393 = const()[name = string("op_6393"), val = tensor([0, 2, 1, 3])]; tensor concat_655x = const()[name = string("concat_655x"), val = tensor([1, -1, 1280])]; tensor var_6394_cast_fp16 = transpose(perm = var_6393, x = var_6392_cast_fp16)[name = string("transpose_405")]; tensor x_529_cast_fp16 = reshape(shape = concat_655x, x = var_6394_cast_fp16)[name = string("x_529_cast_fp16")]; tensor var_6398_to_fp16 = const()[name = string("op_6398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479616896)))]; tensor var_6399_to_fp16 = const()[name = string("op_6399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482893760)))]; tensor linear_235_cast_fp16 = linear(bias = var_6399_to_fp16, weight = var_6398_to_fp16, x = x_529_cast_fp16)[name = string("linear_235_cast_fp16")]; tensor x_531_cast_fp16 = add(x = x_525_cast_fp16, y = linear_235_cast_fp16)[name = string("x_531_cast_fp16")]; tensor var_6406_axes_0 = const()[name = string("op_6406_axes_0"), val = tensor([-1])]; tensor blocks_29_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482896384)))]; tensor blocks_29_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482899008)))]; tensor var_6406_cast_fp16 = layer_norm(axes = var_6406_axes_0, beta = blocks_29_cross_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_cross_attn_ln_weight_to_fp16, x = x_531_cast_fp16)[name = string("op_6406_cast_fp16")]; tensor var_6415_to_fp16 = const()[name = string("op_6415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482901632)))]; tensor var_6416_to_fp16 = const()[name = string("op_6416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486178496)))]; tensor linear_236_cast_fp16 = linear(bias = var_6416_to_fp16, weight = var_6415_to_fp16, x = var_6406_cast_fp16)[name = string("linear_236_cast_fp16")]; tensor concat_656 = const()[name = string("concat_656"), val = tensor([0, 0, 0])]; tensor concat_657 = const()[name = string("concat_657"), val = tensor([0, 1500, 0])]; tensor k_297_internal_tensor_assign_1_stride_0 = const()[name = string("k_297_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_656, begin_mask = k_297_internal_tensor_assign_1_begin_mask_0, end = concat_657, end_mask = k_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_297_internal_tensor_assign_1_squeeze_mask_0, stride = k_297_internal_tensor_assign_1_stride_0, update = k_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("k_297_internal_tensor_assign_1_cast_fp16")]; tensor concat_658 = const()[name = string("concat_658"), val = tensor([0, 0, 0])]; tensor concat_659 = const()[name = string("concat_659"), val = tensor([0, 1500, 0])]; tensor v_297_internal_tensor_assign_1_stride_0 = const()[name = string("v_297_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_658, begin_mask = v_297_internal_tensor_assign_1_begin_mask_0, end = concat_659, end_mask = v_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_297_internal_tensor_assign_1_squeeze_mask_0, stride = v_297_internal_tensor_assign_1_stride_0, update = v_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("v_297_internal_tensor_assign_1_cast_fp16")]; tensor concat_660x = const()[name = string("concat_660x"), val = tensor([1, -1, 20, 64])]; tensor var_6436_cast_fp16 = reshape(shape = concat_660x, x = linear_236_cast_fp16)[name = string("op_6436_cast_fp16")]; tensor const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_239_cast_fp16 = mul(x = var_6436_cast_fp16, y = const_278_to_fp16)[name = string("q_239_cast_fp16")]; tensor var_6442 = const()[name = string("op_6442"), val = tensor([1, 1500, 20, -1])]; tensor var_6443_cast_fp16 = reshape(shape = var_6442, x = k_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6443_cast_fp16")]; tensor const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_299_cast_fp16 = mul(x = var_6443_cast_fp16, y = const_279_to_fp16)[name = string("k_299_cast_fp16")]; tensor var_6449 = const()[name = string("op_6449"), val = tensor([1, 1500, 20, -1])]; tensor var_6450_cast_fp16 = reshape(shape = var_6449, x = v_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6450_cast_fp16")]; tensor var_6451 = const()[name = string("op_6451"), val = tensor([0, 2, 1, 3])]; bool qk_179_transpose_x_0 = const()[name = string("qk_179_transpose_x_0"), val = bool(false)]; bool qk_179_transpose_y_0 = const()[name = string("qk_179_transpose_y_0"), val = bool(false)]; tensor transpose_375_perm_0 = const()[name = string("transpose_375_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_376_perm_0 = const()[name = string("transpose_376_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_376 = transpose(perm = transpose_376_perm_0, x = k_299_cast_fp16)[name = string("transpose_402")]; tensor transpose_375 = transpose(perm = transpose_375_perm_0, x = q_239_cast_fp16)[name = string("transpose_403")]; tensor qk_179_cast_fp16 = matmul(transpose_x = qk_179_transpose_x_0, transpose_y = qk_179_transpose_y_0, x = transpose_375, y = transpose_376)[name = string("qk_179_cast_fp16")]; tensor var_6455_cast_fp16 = softmax(axis = var_6299, x = qk_179_cast_fp16)[name = string("op_6455_cast_fp16")]; bool var_6457_transpose_x_0 = const()[name = string("op_6457_transpose_x_0"), val = bool(false)]; bool var_6457_transpose_y_0 = const()[name = string("op_6457_transpose_y_0"), val = bool(false)]; tensor v_299_cast_fp16 = transpose(perm = var_6451, x = var_6450_cast_fp16)[name = string("transpose_404")]; tensor var_6457_cast_fp16 = matmul(transpose_x = var_6457_transpose_x_0, transpose_y = var_6457_transpose_y_0, x = var_6455_cast_fp16, y = v_299_cast_fp16)[name = string("op_6457_cast_fp16")]; tensor var_6458 = const()[name = string("op_6458"), val = tensor([0, 2, 1, 3])]; tensor concat_661x = const()[name = string("concat_661x"), val = tensor([1, -1, 1280])]; tensor var_6459_cast_fp16 = transpose(perm = var_6458, x = var_6457_cast_fp16)[name = string("transpose_401")]; tensor x_535_cast_fp16 = reshape(shape = concat_661x, x = var_6459_cast_fp16)[name = string("x_535_cast_fp16")]; tensor var_6463_to_fp16 = const()[name = string("op_6463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486181120)))]; tensor var_6464_to_fp16 = const()[name = string("op_6464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489457984)))]; tensor linear_237_cast_fp16 = linear(bias = var_6464_to_fp16, weight = var_6463_to_fp16, x = x_535_cast_fp16)[name = string("linear_237_cast_fp16")]; tensor x_537_cast_fp16 = add(x = x_531_cast_fp16, y = linear_237_cast_fp16)[name = string("x_537_cast_fp16")]; tensor var_6471_axes_0 = const()[name = string("op_6471_axes_0"), val = tensor([-1])]; tensor blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489460608)))]; tensor blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489463232)))]; tensor var_6471_cast_fp16 = layer_norm(axes = var_6471_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_537_cast_fp16)[name = string("op_6471_cast_fp16")]; tensor var_6480_to_fp16 = const()[name = string("op_6480_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489465856)))]; tensor var_6481_to_fp16 = const()[name = string("op_6481_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502573120)))]; tensor linear_238_cast_fp16 = linear(bias = var_6481_to_fp16, weight = var_6480_to_fp16, x = var_6471_cast_fp16)[name = string("linear_238_cast_fp16")]; string x_541_mode_0 = const()[name = string("x_541_mode_0"), val = string("EXACT")]; tensor x_541_cast_fp16 = gelu(mode = x_541_mode_0, x = linear_238_cast_fp16)[name = string("x_541_cast_fp16")]; tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502583424)))]; tensor var_6487_to_fp16 = const()[name = string("op_6487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515690688)))]; tensor linear_239_cast_fp16 = linear(bias = var_6487_to_fp16, weight = var_6486_to_fp16, x = x_541_cast_fp16)[name = string("linear_239_cast_fp16")]; tensor x_543_cast_fp16 = add(x = x_537_cast_fp16, y = linear_239_cast_fp16)[name = string("x_543_cast_fp16")]; tensor k_cache_121_begin_0 = const()[name = string("k_cache_121_begin_0"), val = tensor([30, 0, 0, 0])]; tensor k_cache_121_end_0 = const()[name = string("k_cache_121_end_0"), val = tensor([31, 1, 448, 1280])]; tensor k_cache_121_end_mask_0 = const()[name = string("k_cache_121_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_121_squeeze_mask_0 = const()[name = string("k_cache_121_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_121_cast_fp16 = slice_by_index(begin = k_cache_121_begin_0, end = k_cache_121_end_0, end_mask = k_cache_121_end_mask_0, squeeze_mask = k_cache_121_squeeze_mask_0, x = coreml_update_state_122)[name = string("k_cache_121_cast_fp16")]; tensor v_cache_121_begin_0 = const()[name = string("v_cache_121_begin_0"), val = tensor([30, 0, 0, 0])]; tensor v_cache_121_end_0 = const()[name = string("v_cache_121_end_0"), val = tensor([31, 1, 448, 1280])]; tensor v_cache_121_end_mask_0 = const()[name = string("v_cache_121_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_121_squeeze_mask_0 = const()[name = string("v_cache_121_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_121_cast_fp16 = slice_by_index(begin = v_cache_121_begin_0, end = v_cache_121_end_0, end_mask = v_cache_121_end_mask_0, squeeze_mask = v_cache_121_squeeze_mask_0, x = coreml_update_state_123)[name = string("v_cache_121_cast_fp16")]; tensor k_cache_123_begin_0 = const()[name = string("k_cache_123_begin_0"), val = tensor([30, 0, 0, 0])]; tensor k_cache_123_end_0 = const()[name = string("k_cache_123_end_0"), val = tensor([31, 1, 1500, 1280])]; tensor k_cache_123_end_mask_0 = const()[name = string("k_cache_123_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_123_squeeze_mask_0 = const()[name = string("k_cache_123_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_123_cast_fp16 = slice_by_index(begin = k_cache_123_begin_0, end = k_cache_123_end_0, end_mask = k_cache_123_end_mask_0, squeeze_mask = k_cache_123_squeeze_mask_0, x = read_state_2)[name = string("k_cache_123_cast_fp16")]; tensor v_cache_123_begin_0 = const()[name = string("v_cache_123_begin_0"), val = tensor([30, 0, 0, 0])]; tensor v_cache_123_end_0 = const()[name = string("v_cache_123_end_0"), val = tensor([31, 1, 1500, 1280])]; tensor v_cache_123_end_mask_0 = const()[name = string("v_cache_123_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_123_squeeze_mask_0 = const()[name = string("v_cache_123_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_123_cast_fp16 = slice_by_index(begin = v_cache_123_begin_0, end = v_cache_123_end_0, end_mask = v_cache_123_end_mask_0, squeeze_mask = v_cache_123_squeeze_mask_0, x = read_state_3)[name = string("v_cache_123_cast_fp16")]; int32 var_6510 = const()[name = string("op_6510"), val = int32(-1)]; tensor var_6528_axes_0 = const()[name = string("op_6528_axes_0"), val = tensor([-1])]; tensor blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515693312)))]; tensor blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515695936)))]; fp16 var_6516_to_fp16 = const()[name = string("op_6516_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_6528_cast_fp16 = layer_norm(axes = var_6528_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_543_cast_fp16)[name = string("op_6528_cast_fp16")]; tensor var_6539_to_fp16 = const()[name = string("op_6539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515698560)))]; tensor var_6540_to_fp16 = const()[name = string("op_6540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518975424)))]; tensor linear_240_cast_fp16 = linear(bias = var_6540_to_fp16, weight = var_6539_to_fp16, x = var_6528_cast_fp16)[name = string("linear_240_cast_fp16")]; tensor var_6543_to_fp16 = const()[name = string("op_6543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518978048)))]; tensor linear_241_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6543_to_fp16, x = var_6528_cast_fp16)[name = string("linear_241_cast_fp16")]; tensor var_6547_to_fp16 = const()[name = string("op_6547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1522254912)))]; tensor var_6548_to_fp16 = const()[name = string("op_6548_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525531776)))]; tensor linear_242_cast_fp16 = linear(bias = var_6548_to_fp16, weight = var_6547_to_fp16, x = var_6528_cast_fp16)[name = string("linear_242_cast_fp16")]; tensor var_6550_shape_cast_fp16 = shape(x = linear_240_cast_fp16)[name = string("op_6550_shape_cast_fp16")]; int32 gather_362_axis_0 = const()[name = string("gather_362_axis_0"), val = int32(0)]; int32 gather_362_batch_dims_0 = const()[name = string("gather_362_batch_dims_0"), val = int32(0)]; bool gather_362_validate_indices_0 = const()[name = string("gather_362_validate_indices_0"), val = bool(false)]; string var_6550_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6550_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_362_to_uint16 = const()[name = string("select_362_to_uint16"), val = uint16(1)]; tensor var_6550_shape_cast_fp16_to_uint16 = cast(dtype = var_6550_shape_cast_fp16_to_uint16_dtype_0, x = var_6550_shape_cast_fp16)[name = string("cast_330")]; uint16 gather_362_cast_uint16 = gather(axis = gather_362_axis_0, batch_dims = gather_362_batch_dims_0, indices = select_362_to_uint16, validate_indices = gather_362_validate_indices_0, x = var_6550_shape_cast_fp16_to_uint16)[name = string("gather_362_cast_uint16")]; string gather_362_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_362_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_362_cast_uint16_to_int32 = cast(dtype = gather_362_cast_uint16_to_int32_dtype_0, x = gather_362_cast_uint16)[name = string("cast_329")]; int32 end_step_63 = add(x = offset, y = gather_362_cast_uint16_to_int32)[name = string("end_step_63")]; tensor expand_dims_480 = const()[name = string("expand_dims_480"), val = tensor([0])]; tensor expand_dims_482 = const()[name = string("expand_dims_482"), val = tensor([0])]; tensor expand_dims_483_axes_0 = const()[name = string("expand_dims_483_axes_0"), val = tensor([0])]; tensor expand_dims_483 = expand_dims(axes = expand_dims_483_axes_0, x = end_step_63)[name = string("expand_dims_483")]; tensor concat_664_values0_0 = const()[name = string("concat_664_values0_0"), val = tensor([30])]; int32 concat_664_axis_0 = const()[name = string("concat_664_axis_0"), val = int32(0)]; bool concat_664_interleave_0 = const()[name = string("concat_664_interleave_0"), val = bool(false)]; tensor concat_664 = concat(axis = concat_664_axis_0, interleave = concat_664_interleave_0, values = (concat_664_values0_0, expand_dims_480, expand_dims_1, expand_dims_482))[name = string("concat_664")]; tensor concat_665_values0_0 = const()[name = string("concat_665_values0_0"), val = tensor([0])]; tensor concat_665_values1_0 = const()[name = string("concat_665_values1_0"), val = tensor([0])]; tensor concat_665_values3_0 = const()[name = string("concat_665_values3_0"), val = tensor([0])]; int32 concat_665_axis_0 = const()[name = string("concat_665_axis_0"), val = int32(0)]; bool concat_665_interleave_0 = const()[name = string("concat_665_interleave_0"), val = bool(false)]; tensor concat_665 = concat(axis = concat_665_axis_0, interleave = concat_665_interleave_0, values = (concat_665_values0_0, concat_665_values1_0, expand_dims_483, concat_665_values3_0))[name = string("concat_665")]; tensor k_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = k_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = k_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_31_stride_0, update = linear_241_cast_fp16, x = coreml_update_state_122)[name = string("k_cache1_internal_tensor_assign_31_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_31_cast_fp16, input = k_cache1)[name = string("coreml_update_state_124_write_state")]; tensor coreml_update_state_124 = read_state(input = k_cache1)[name = string("coreml_update_state_124")]; tensor v_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = v_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = v_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_31_stride_0, update = linear_242_cast_fp16, x = coreml_update_state_123)[name = string("v_cache1_internal_tensor_assign_31_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_31_cast_fp16, input = v_cache1)[name = string("coreml_update_state_125_write_state")]; tensor coreml_update_state_125 = read_state(input = v_cache1)[name = string("coreml_update_state_125")]; int32 concat_670_values0_0 = const()[name = string("concat_670_values0_0"), val = int32(1)]; int32 concat_670_values2_0 = const()[name = string("concat_670_values2_0"), val = int32(1280)]; int32 concat_670_axis_0 = const()[name = string("concat_670_axis_0"), val = int32(0)]; bool concat_670_interleave_0 = const()[name = string("concat_670_interleave_0"), val = bool(false)]; tensor concat_670 = concat(axis = concat_670_axis_0, interleave = concat_670_interleave_0, values = (concat_670_values0_0, end_step_63, concat_670_values2_0))[name = string("concat_670")]; tensor var_6566_begin_0 = const()[name = string("op_6566_begin_0"), val = tensor([0, 0, 0])]; tensor var_6566_end_mask_0 = const()[name = string("op_6566_end_mask_0"), val = tensor([true, false, true])]; tensor var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = concat_670, end_mask = var_6566_end_mask_0, x = k_cache_121_cast_fp16)[name = string("op_6566_cast_fp16")]; tensor var_6569_begin_0 = const()[name = string("op_6569_begin_0"), val = tensor([0, 0, 0])]; tensor var_6569_end_mask_0 = const()[name = string("op_6569_end_mask_0"), val = tensor([true, false, true])]; tensor var_6569_cast_fp16 = slice_by_index(begin = var_6569_begin_0, end = concat_670, end_mask = var_6569_end_mask_0, x = v_cache_121_cast_fp16)[name = string("op_6569_cast_fp16")]; tensor concat_672x = const()[name = string("concat_672x"), val = tensor([1, -1, 20, 64])]; tensor var_6579_cast_fp16 = reshape(shape = concat_672x, x = linear_240_cast_fp16)[name = string("op_6579_cast_fp16")]; tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_243_cast_fp16 = mul(x = var_6579_cast_fp16, y = const_280_to_fp16)[name = string("q_243_cast_fp16")]; tensor concat_673x = const()[name = string("concat_673x"), val = tensor([1, -1, 20, 64])]; tensor var_6586_cast_fp16 = reshape(shape = concat_673x, x = var_6566_cast_fp16)[name = string("op_6586_cast_fp16")]; tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_305_cast_fp16 = mul(x = var_6586_cast_fp16, y = const_281_to_fp16)[name = string("k_305_cast_fp16")]; tensor concat_674x = const()[name = string("concat_674x"), val = tensor([1, -1, 20, 64])]; tensor var_6593_cast_fp16 = reshape(shape = concat_674x, x = var_6569_cast_fp16)[name = string("op_6593_cast_fp16")]; tensor var_6594 = const()[name = string("op_6594"), val = tensor([0, 2, 1, 3])]; bool qk_181_transpose_x_0 = const()[name = string("qk_181_transpose_x_0"), val = bool(false)]; bool qk_181_transpose_y_0 = const()[name = string("qk_181_transpose_y_0"), val = bool(false)]; tensor transpose_377_perm_0 = const()[name = string("transpose_377_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_378_perm_0 = const()[name = string("transpose_378_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_378 = transpose(perm = transpose_378_perm_0, x = k_305_cast_fp16)[name = string("transpose_398")]; tensor transpose_377 = transpose(perm = transpose_377_perm_0, x = q_243_cast_fp16)[name = string("transpose_399")]; tensor qk_181_cast_fp16 = matmul(transpose_x = qk_181_transpose_x_0, transpose_y = qk_181_transpose_y_0, x = transpose_377, y = transpose_378)[name = string("qk_181_cast_fp16")]; int32 concat_675_values1_0 = const()[name = string("concat_675_values1_0"), val = int32(448)]; int32 concat_675_axis_0 = const()[name = string("concat_675_axis_0"), val = int32(0)]; bool concat_675_interleave_0 = const()[name = string("concat_675_interleave_0"), val = bool(false)]; tensor concat_675 = concat(axis = concat_675_axis_0, interleave = concat_675_interleave_0, values = (gather_362_cast_uint16_to_int32, concat_675_values1_0))[name = string("concat_675")]; tensor var_6597_begin_0 = const()[name = string("op_6597_begin_0"), val = tensor([0, 0])]; tensor var_6597_end_mask_0 = const()[name = string("op_6597_end_mask_0"), val = tensor([false, true])]; tensor var_6597_cast_fp16 = slice_by_index(begin = var_6597_begin_0, end = concat_675, end_mask = var_6597_end_mask_0, x = mask_to_fp16)[name = string("op_6597_cast_fp16")]; int32 concat_676_values0_0 = const()[name = string("concat_676_values0_0"), val = int32(0)]; int32 concat_676_axis_0 = const()[name = string("concat_676_axis_0"), val = int32(0)]; bool concat_676_interleave_0 = const()[name = string("concat_676_interleave_0"), val = bool(false)]; tensor concat_676 = concat(axis = concat_676_axis_0, interleave = concat_676_interleave_0, values = (concat_676_values0_0, gather_362_cast_uint16_to_int32))[name = string("concat_676")]; tensor var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor([0, 0])]; tensor var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor([true, false])]; tensor var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = concat_676, end_mask = var_6598_end_mask_0, x = var_6597_cast_fp16)[name = string("op_6598_cast_fp16")]; tensor qk_183_cast_fp16 = add(x = qk_181_cast_fp16, y = var_6598_cast_fp16)[name = string("qk_183_cast_fp16")]; tensor var_6601_cast_fp16 = softmax(axis = var_6510, x = qk_183_cast_fp16)[name = string("op_6601_cast_fp16")]; bool var_6603_transpose_x_0 = const()[name = string("op_6603_transpose_x_0"), val = bool(false)]; bool var_6603_transpose_y_0 = const()[name = string("op_6603_transpose_y_0"), val = bool(false)]; tensor v_305_cast_fp16 = transpose(perm = var_6594, x = var_6593_cast_fp16)[name = string("transpose_400")]; tensor var_6603_cast_fp16 = matmul(transpose_x = var_6603_transpose_x_0, transpose_y = var_6603_transpose_y_0, x = var_6601_cast_fp16, y = v_305_cast_fp16)[name = string("op_6603_cast_fp16")]; tensor var_6604 = const()[name = string("op_6604"), val = tensor([0, 2, 1, 3])]; tensor concat_677x = const()[name = string("concat_677x"), val = tensor([1, -1, 1280])]; tensor var_6605_cast_fp16 = transpose(perm = var_6604, x = var_6603_cast_fp16)[name = string("transpose_397")]; tensor x_547_cast_fp16 = reshape(shape = concat_677x, x = var_6605_cast_fp16)[name = string("x_547_cast_fp16")]; tensor var_6609_to_fp16 = const()[name = string("op_6609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525534400)))]; tensor var_6610_to_fp16 = const()[name = string("op_6610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528811264)))]; tensor linear_243_cast_fp16 = linear(bias = var_6610_to_fp16, weight = var_6609_to_fp16, x = x_547_cast_fp16)[name = string("linear_243_cast_fp16")]; tensor x_549_cast_fp16 = add(x = x_543_cast_fp16, y = linear_243_cast_fp16)[name = string("x_549_cast_fp16")]; tensor var_6617_axes_0 = const()[name = string("op_6617_axes_0"), val = tensor([-1])]; tensor blocks_30_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528813888)))]; tensor blocks_30_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528816512)))]; tensor var_6617_cast_fp16 = layer_norm(axes = var_6617_axes_0, beta = blocks_30_cross_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_cross_attn_ln_weight_to_fp16, x = x_549_cast_fp16)[name = string("op_6617_cast_fp16")]; tensor var_6626_to_fp16 = const()[name = string("op_6626_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528819136)))]; tensor var_6627_to_fp16 = const()[name = string("op_6627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532096000)))]; tensor linear_244_cast_fp16 = linear(bias = var_6627_to_fp16, weight = var_6626_to_fp16, x = var_6617_cast_fp16)[name = string("linear_244_cast_fp16")]; tensor concat_678 = const()[name = string("concat_678"), val = tensor([0, 0, 0])]; tensor concat_679 = const()[name = string("concat_679"), val = tensor([0, 1500, 0])]; tensor k_307_internal_tensor_assign_1_stride_0 = const()[name = string("k_307_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_678, begin_mask = k_307_internal_tensor_assign_1_begin_mask_0, end = concat_679, end_mask = k_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_307_internal_tensor_assign_1_squeeze_mask_0, stride = k_307_internal_tensor_assign_1_stride_0, update = k_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("k_307_internal_tensor_assign_1_cast_fp16")]; tensor concat_680 = const()[name = string("concat_680"), val = tensor([0, 0, 0])]; tensor concat_681 = const()[name = string("concat_681"), val = tensor([0, 1500, 0])]; tensor v_307_internal_tensor_assign_1_stride_0 = const()[name = string("v_307_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_680, begin_mask = v_307_internal_tensor_assign_1_begin_mask_0, end = concat_681, end_mask = v_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_307_internal_tensor_assign_1_squeeze_mask_0, stride = v_307_internal_tensor_assign_1_stride_0, update = v_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("v_307_internal_tensor_assign_1_cast_fp16")]; tensor concat_682x = const()[name = string("concat_682x"), val = tensor([1, -1, 20, 64])]; tensor var_6647_cast_fp16 = reshape(shape = concat_682x, x = linear_244_cast_fp16)[name = string("op_6647_cast_fp16")]; tensor const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_247_cast_fp16 = mul(x = var_6647_cast_fp16, y = const_282_to_fp16)[name = string("q_247_cast_fp16")]; tensor var_6653 = const()[name = string("op_6653"), val = tensor([1, 1500, 20, -1])]; tensor var_6654_cast_fp16 = reshape(shape = var_6653, x = k_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6654_cast_fp16")]; tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_309_cast_fp16 = mul(x = var_6654_cast_fp16, y = const_283_to_fp16)[name = string("k_309_cast_fp16")]; tensor var_6660 = const()[name = string("op_6660"), val = tensor([1, 1500, 20, -1])]; tensor var_6661_cast_fp16 = reshape(shape = var_6660, x = v_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6661_cast_fp16")]; tensor var_6662 = const()[name = string("op_6662"), val = tensor([0, 2, 1, 3])]; bool qk_185_transpose_x_0 = const()[name = string("qk_185_transpose_x_0"), val = bool(false)]; bool qk_185_transpose_y_0 = const()[name = string("qk_185_transpose_y_0"), val = bool(false)]; tensor transpose_379_perm_0 = const()[name = string("transpose_379_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_380_perm_0 = const()[name = string("transpose_380_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_380 = transpose(perm = transpose_380_perm_0, x = k_309_cast_fp16)[name = string("transpose_394")]; tensor transpose_379 = transpose(perm = transpose_379_perm_0, x = q_247_cast_fp16)[name = string("transpose_395")]; tensor qk_185_cast_fp16 = matmul(transpose_x = qk_185_transpose_x_0, transpose_y = qk_185_transpose_y_0, x = transpose_379, y = transpose_380)[name = string("qk_185_cast_fp16")]; tensor var_6666_cast_fp16 = softmax(axis = var_6510, x = qk_185_cast_fp16)[name = string("op_6666_cast_fp16")]; bool var_6668_transpose_x_0 = const()[name = string("op_6668_transpose_x_0"), val = bool(false)]; bool var_6668_transpose_y_0 = const()[name = string("op_6668_transpose_y_0"), val = bool(false)]; tensor v_309_cast_fp16 = transpose(perm = var_6662, x = var_6661_cast_fp16)[name = string("transpose_396")]; tensor var_6668_cast_fp16 = matmul(transpose_x = var_6668_transpose_x_0, transpose_y = var_6668_transpose_y_0, x = var_6666_cast_fp16, y = v_309_cast_fp16)[name = string("op_6668_cast_fp16")]; tensor var_6669 = const()[name = string("op_6669"), val = tensor([0, 2, 1, 3])]; tensor concat_683x = const()[name = string("concat_683x"), val = tensor([1, -1, 1280])]; tensor var_6670_cast_fp16 = transpose(perm = var_6669, x = var_6668_cast_fp16)[name = string("transpose_393")]; tensor x_553_cast_fp16 = reshape(shape = concat_683x, x = var_6670_cast_fp16)[name = string("x_553_cast_fp16")]; tensor var_6674_to_fp16 = const()[name = string("op_6674_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532098624)))]; tensor var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535375488)))]; tensor linear_245_cast_fp16 = linear(bias = var_6675_to_fp16, weight = var_6674_to_fp16, x = x_553_cast_fp16)[name = string("linear_245_cast_fp16")]; tensor x_555_cast_fp16 = add(x = x_549_cast_fp16, y = linear_245_cast_fp16)[name = string("x_555_cast_fp16")]; tensor var_6682_axes_0 = const()[name = string("op_6682_axes_0"), val = tensor([-1])]; tensor blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535378112)))]; tensor blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535380736)))]; tensor var_6682_cast_fp16 = layer_norm(axes = var_6682_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_555_cast_fp16)[name = string("op_6682_cast_fp16")]; tensor var_6691_to_fp16 = const()[name = string("op_6691_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535383360)))]; tensor var_6692_to_fp16 = const()[name = string("op_6692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548490624)))]; tensor linear_246_cast_fp16 = linear(bias = var_6692_to_fp16, weight = var_6691_to_fp16, x = var_6682_cast_fp16)[name = string("linear_246_cast_fp16")]; string x_559_mode_0 = const()[name = string("x_559_mode_0"), val = string("EXACT")]; tensor x_559_cast_fp16 = gelu(mode = x_559_mode_0, x = linear_246_cast_fp16)[name = string("x_559_cast_fp16")]; tensor var_6697_to_fp16 = const()[name = string("op_6697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548500928)))]; tensor var_6698_to_fp16 = const()[name = string("op_6698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561608192)))]; tensor linear_247_cast_fp16 = linear(bias = var_6698_to_fp16, weight = var_6697_to_fp16, x = x_559_cast_fp16)[name = string("linear_247_cast_fp16")]; tensor x_561_cast_fp16 = add(x = x_555_cast_fp16, y = linear_247_cast_fp16)[name = string("x_561_cast_fp16")]; tensor k_cache_125_begin_0 = const()[name = string("k_cache_125_begin_0"), val = tensor([31, 0, 0, 0])]; tensor k_cache_125_end_0 = const()[name = string("k_cache_125_end_0"), val = tensor([32, 1, 448, 1280])]; tensor k_cache_125_end_mask_0 = const()[name = string("k_cache_125_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_125_squeeze_mask_0 = const()[name = string("k_cache_125_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_125_cast_fp16 = slice_by_index(begin = k_cache_125_begin_0, end = k_cache_125_end_0, end_mask = k_cache_125_end_mask_0, squeeze_mask = k_cache_125_squeeze_mask_0, x = coreml_update_state_124)[name = string("k_cache_125_cast_fp16")]; tensor v_cache_125_begin_0 = const()[name = string("v_cache_125_begin_0"), val = tensor([31, 0, 0, 0])]; tensor v_cache_125_end_0 = const()[name = string("v_cache_125_end_0"), val = tensor([32, 1, 448, 1280])]; tensor v_cache_125_end_mask_0 = const()[name = string("v_cache_125_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_125_squeeze_mask_0 = const()[name = string("v_cache_125_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_125_cast_fp16 = slice_by_index(begin = v_cache_125_begin_0, end = v_cache_125_end_0, end_mask = v_cache_125_end_mask_0, squeeze_mask = v_cache_125_squeeze_mask_0, x = coreml_update_state_125)[name = string("v_cache_125_cast_fp16")]; tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([31, 0, 0, 0])]; tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([32, 1, 1500, 1280])]; tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([31, 0, 0, 0])]; tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([32, 1, 1500, 1280])]; tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; int32 var_6721 = const()[name = string("op_6721"), val = int32(-1)]; tensor var_6739_axes_0 = const()[name = string("op_6739_axes_0"), val = tensor([-1])]; tensor blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561610816)))]; tensor blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561613440)))]; fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_6739_cast_fp16 = layer_norm(axes = var_6739_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_561_cast_fp16)[name = string("op_6739_cast_fp16")]; tensor var_6750_to_fp16 = const()[name = string("op_6750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561616064)))]; tensor var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564892928)))]; tensor linear_248_cast_fp16 = linear(bias = var_6751_to_fp16, weight = var_6750_to_fp16, x = var_6739_cast_fp16)[name = string("linear_248_cast_fp16")]; tensor var_6754_to_fp16 = const()[name = string("op_6754_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564895552)))]; tensor linear_249_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6754_to_fp16, x = var_6739_cast_fp16)[name = string("linear_249_cast_fp16")]; tensor var_6758_to_fp16 = const()[name = string("op_6758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1568172416)))]; tensor var_6759_to_fp16 = const()[name = string("op_6759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571449280)))]; tensor linear_250_cast_fp16 = linear(bias = var_6759_to_fp16, weight = var_6758_to_fp16, x = var_6739_cast_fp16)[name = string("linear_250_cast_fp16")]; tensor var_6761_shape_cast_fp16 = shape(x = linear_248_cast_fp16)[name = string("op_6761_shape_cast_fp16")]; int32 gather_374_axis_0 = const()[name = string("gather_374_axis_0"), val = int32(0)]; int32 gather_374_batch_dims_0 = const()[name = string("gather_374_batch_dims_0"), val = int32(0)]; bool gather_374_validate_indices_0 = const()[name = string("gather_374_validate_indices_0"), val = bool(false)]; string var_6761_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6761_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_374_to_uint16 = const()[name = string("select_374_to_uint16"), val = uint16(1)]; tensor var_6761_shape_cast_fp16_to_uint16 = cast(dtype = var_6761_shape_cast_fp16_to_uint16_dtype_0, x = var_6761_shape_cast_fp16)[name = string("cast_328")]; uint16 gather_374_cast_uint16 = gather(axis = gather_374_axis_0, batch_dims = gather_374_batch_dims_0, indices = select_374_to_uint16, validate_indices = gather_374_validate_indices_0, x = var_6761_shape_cast_fp16_to_uint16)[name = string("gather_374_cast_uint16")]; string gather_374_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_374_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_374_cast_uint16_to_int32 = cast(dtype = gather_374_cast_uint16_to_int32_dtype_0, x = gather_374_cast_uint16)[name = string("cast_327")]; int32 end_step = add(x = offset, y = gather_374_cast_uint16_to_int32)[name = string("end_step")]; tensor expand_dims_496 = const()[name = string("expand_dims_496"), val = tensor([0])]; tensor expand_dims_498 = const()[name = string("expand_dims_498"), val = tensor([0])]; tensor expand_dims_499_axes_0 = const()[name = string("expand_dims_499_axes_0"), val = tensor([0])]; tensor expand_dims_499 = expand_dims(axes = expand_dims_499_axes_0, x = end_step)[name = string("expand_dims_499")]; tensor concat_686_values0_0 = const()[name = string("concat_686_values0_0"), val = tensor([31])]; int32 concat_686_axis_0 = const()[name = string("concat_686_axis_0"), val = int32(0)]; bool concat_686_interleave_0 = const()[name = string("concat_686_interleave_0"), val = bool(false)]; tensor concat_686 = concat(axis = concat_686_axis_0, interleave = concat_686_interleave_0, values = (concat_686_values0_0, expand_dims_496, expand_dims_1, expand_dims_498))[name = string("concat_686")]; tensor concat_687_values0_0 = const()[name = string("concat_687_values0_0"), val = tensor([0])]; tensor concat_687_values1_0 = const()[name = string("concat_687_values1_0"), val = tensor([0])]; tensor concat_687_values3_0 = const()[name = string("concat_687_values3_0"), val = tensor([0])]; int32 concat_687_axis_0 = const()[name = string("concat_687_axis_0"), val = int32(0)]; bool concat_687_interleave_0 = const()[name = string("concat_687_interleave_0"), val = bool(false)]; tensor concat_687 = concat(axis = concat_687_axis_0, interleave = concat_687_interleave_0, values = (concat_687_values0_0, concat_687_values1_0, expand_dims_499, concat_687_values3_0))[name = string("concat_687")]; tensor k_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = k_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = k_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_32_stride_0, update = linear_249_cast_fp16, x = coreml_update_state_124)[name = string("k_cache1_internal_tensor_assign_32_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_32_cast_fp16, input = k_cache1)[name = string("coreml_update_state_126_write_state")]; tensor v_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = v_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = v_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_32_stride_0, update = linear_250_cast_fp16, x = coreml_update_state_125)[name = string("v_cache1_internal_tensor_assign_32_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_32_cast_fp16, input = v_cache1)[name = string("coreml_update_state_127_write_state")]; int32 concat_692_values0_0 = const()[name = string("concat_692_values0_0"), val = int32(1)]; int32 concat_692_values2_0 = const()[name = string("concat_692_values2_0"), val = int32(1280)]; int32 concat_692_axis_0 = const()[name = string("concat_692_axis_0"), val = int32(0)]; bool concat_692_interleave_0 = const()[name = string("concat_692_interleave_0"), val = bool(false)]; tensor concat_692 = concat(axis = concat_692_axis_0, interleave = concat_692_interleave_0, values = (concat_692_values0_0, end_step, concat_692_values2_0))[name = string("concat_692")]; tensor var_6777_begin_0 = const()[name = string("op_6777_begin_0"), val = tensor([0, 0, 0])]; tensor var_6777_end_mask_0 = const()[name = string("op_6777_end_mask_0"), val = tensor([true, false, true])]; tensor var_6777_cast_fp16 = slice_by_index(begin = var_6777_begin_0, end = concat_692, end_mask = var_6777_end_mask_0, x = k_cache_125_cast_fp16)[name = string("op_6777_cast_fp16")]; tensor var_6780_begin_0 = const()[name = string("op_6780_begin_0"), val = tensor([0, 0, 0])]; tensor var_6780_end_mask_0 = const()[name = string("op_6780_end_mask_0"), val = tensor([true, false, true])]; tensor var_6780_cast_fp16 = slice_by_index(begin = var_6780_begin_0, end = concat_692, end_mask = var_6780_end_mask_0, x = v_cache_125_cast_fp16)[name = string("op_6780_cast_fp16")]; tensor concat_694x = const()[name = string("concat_694x"), val = tensor([1, -1, 20, 64])]; tensor var_6790_cast_fp16 = reshape(shape = concat_694x, x = linear_248_cast_fp16)[name = string("op_6790_cast_fp16")]; tensor const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_251_cast_fp16 = mul(x = var_6790_cast_fp16, y = const_284_to_fp16)[name = string("q_251_cast_fp16")]; tensor concat_695x = const()[name = string("concat_695x"), val = tensor([1, -1, 20, 64])]; tensor var_6797_cast_fp16 = reshape(shape = concat_695x, x = var_6777_cast_fp16)[name = string("op_6797_cast_fp16")]; tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_315_cast_fp16 = mul(x = var_6797_cast_fp16, y = const_285_to_fp16)[name = string("k_315_cast_fp16")]; tensor concat_696x = const()[name = string("concat_696x"), val = tensor([1, -1, 20, 64])]; tensor var_6804_cast_fp16 = reshape(shape = concat_696x, x = var_6780_cast_fp16)[name = string("op_6804_cast_fp16")]; tensor var_6805 = const()[name = string("op_6805"), val = tensor([0, 2, 1, 3])]; bool qk_187_transpose_x_0 = const()[name = string("qk_187_transpose_x_0"), val = bool(false)]; bool qk_187_transpose_y_0 = const()[name = string("qk_187_transpose_y_0"), val = bool(false)]; tensor transpose_381_perm_0 = const()[name = string("transpose_381_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_382_perm_0 = const()[name = string("transpose_382_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_382 = transpose(perm = transpose_382_perm_0, x = k_315_cast_fp16)[name = string("transpose_390")]; tensor transpose_381 = transpose(perm = transpose_381_perm_0, x = q_251_cast_fp16)[name = string("transpose_391")]; tensor qk_187_cast_fp16 = matmul(transpose_x = qk_187_transpose_x_0, transpose_y = qk_187_transpose_y_0, x = transpose_381, y = transpose_382)[name = string("qk_187_cast_fp16")]; int32 concat_697_values1_0 = const()[name = string("concat_697_values1_0"), val = int32(448)]; int32 concat_697_axis_0 = const()[name = string("concat_697_axis_0"), val = int32(0)]; bool concat_697_interleave_0 = const()[name = string("concat_697_interleave_0"), val = bool(false)]; tensor concat_697 = concat(axis = concat_697_axis_0, interleave = concat_697_interleave_0, values = (gather_374_cast_uint16_to_int32, concat_697_values1_0))[name = string("concat_697")]; tensor var_6808_begin_0 = const()[name = string("op_6808_begin_0"), val = tensor([0, 0])]; tensor var_6808_end_mask_0 = const()[name = string("op_6808_end_mask_0"), val = tensor([false, true])]; tensor var_6808_cast_fp16 = slice_by_index(begin = var_6808_begin_0, end = concat_697, end_mask = var_6808_end_mask_0, x = mask_to_fp16)[name = string("op_6808_cast_fp16")]; int32 concat_698_values0_0 = const()[name = string("concat_698_values0_0"), val = int32(0)]; int32 concat_698_axis_0 = const()[name = string("concat_698_axis_0"), val = int32(0)]; bool concat_698_interleave_0 = const()[name = string("concat_698_interleave_0"), val = bool(false)]; tensor concat_698 = concat(axis = concat_698_axis_0, interleave = concat_698_interleave_0, values = (concat_698_values0_0, gather_374_cast_uint16_to_int32))[name = string("concat_698")]; tensor var_6809_begin_0 = const()[name = string("op_6809_begin_0"), val = tensor([0, 0])]; tensor var_6809_end_mask_0 = const()[name = string("op_6809_end_mask_0"), val = tensor([true, false])]; tensor var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = concat_698, end_mask = var_6809_end_mask_0, x = var_6808_cast_fp16)[name = string("op_6809_cast_fp16")]; tensor qk_189_cast_fp16 = add(x = qk_187_cast_fp16, y = var_6809_cast_fp16)[name = string("qk_189_cast_fp16")]; tensor var_6812_cast_fp16 = softmax(axis = var_6721, x = qk_189_cast_fp16)[name = string("op_6812_cast_fp16")]; bool var_6814_transpose_x_0 = const()[name = string("op_6814_transpose_x_0"), val = bool(false)]; bool var_6814_transpose_y_0 = const()[name = string("op_6814_transpose_y_0"), val = bool(false)]; tensor v_315_cast_fp16 = transpose(perm = var_6805, x = var_6804_cast_fp16)[name = string("transpose_392")]; tensor var_6814_cast_fp16 = matmul(transpose_x = var_6814_transpose_x_0, transpose_y = var_6814_transpose_y_0, x = var_6812_cast_fp16, y = v_315_cast_fp16)[name = string("op_6814_cast_fp16")]; tensor var_6815 = const()[name = string("op_6815"), val = tensor([0, 2, 1, 3])]; tensor concat_699x = const()[name = string("concat_699x"), val = tensor([1, -1, 1280])]; tensor var_6816_cast_fp16 = transpose(perm = var_6815, x = var_6814_cast_fp16)[name = string("transpose_389")]; tensor x_565_cast_fp16 = reshape(shape = concat_699x, x = var_6816_cast_fp16)[name = string("x_565_cast_fp16")]; tensor var_6820_to_fp16 = const()[name = string("op_6820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571451904)))]; tensor var_6821_to_fp16 = const()[name = string("op_6821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574728768)))]; tensor linear_251_cast_fp16 = linear(bias = var_6821_to_fp16, weight = var_6820_to_fp16, x = x_565_cast_fp16)[name = string("linear_251_cast_fp16")]; tensor x_567_cast_fp16 = add(x = x_561_cast_fp16, y = linear_251_cast_fp16)[name = string("x_567_cast_fp16")]; tensor var_6828_axes_0 = const()[name = string("op_6828_axes_0"), val = tensor([-1])]; tensor blocks_31_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574731392)))]; tensor blocks_31_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574734016)))]; tensor var_6828_cast_fp16 = layer_norm(axes = var_6828_axes_0, beta = blocks_31_cross_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_cross_attn_ln_weight_to_fp16, x = x_567_cast_fp16)[name = string("op_6828_cast_fp16")]; tensor var_6837_to_fp16 = const()[name = string("op_6837_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574736640)))]; tensor var_6838_to_fp16 = const()[name = string("op_6838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578013504)))]; tensor linear_252_cast_fp16 = linear(bias = var_6838_to_fp16, weight = var_6837_to_fp16, x = var_6828_cast_fp16)[name = string("linear_252_cast_fp16")]; tensor concat_700 = const()[name = string("concat_700"), val = tensor([0, 0, 0])]; tensor concat_701 = const()[name = string("concat_701"), val = tensor([0, 1500, 0])]; tensor k_317_internal_tensor_assign_1_stride_0 = const()[name = string("k_317_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_700, begin_mask = k_317_internal_tensor_assign_1_begin_mask_0, end = concat_701, end_mask = k_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_317_internal_tensor_assign_1_squeeze_mask_0, stride = k_317_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_317_internal_tensor_assign_1_cast_fp16")]; tensor concat_702 = const()[name = string("concat_702"), val = tensor([0, 0, 0])]; tensor concat_703 = const()[name = string("concat_703"), val = tensor([0, 1500, 0])]; tensor v_317_internal_tensor_assign_1_stride_0 = const()[name = string("v_317_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_702, begin_mask = v_317_internal_tensor_assign_1_begin_mask_0, end = concat_703, end_mask = v_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_317_internal_tensor_assign_1_squeeze_mask_0, stride = v_317_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_317_internal_tensor_assign_1_cast_fp16")]; tensor concat_704x = const()[name = string("concat_704x"), val = tensor([1, -1, 20, 64])]; tensor var_6858_cast_fp16 = reshape(shape = concat_704x, x = linear_252_cast_fp16)[name = string("op_6858_cast_fp16")]; tensor const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_cast_fp16 = mul(x = var_6858_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")]; tensor var_6864 = const()[name = string("op_6864"), val = tensor([1, 1500, 20, -1])]; tensor var_6865_cast_fp16 = reshape(shape = var_6864, x = k_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6865_cast_fp16")]; tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_cast_fp16 = mul(x = var_6865_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")]; tensor var_6871 = const()[name = string("op_6871"), val = tensor([1, 1500, 20, -1])]; tensor var_6872_cast_fp16 = reshape(shape = var_6871, x = v_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6872_cast_fp16")]; tensor var_6873 = const()[name = string("op_6873"), val = tensor([0, 2, 1, 3])]; bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; tensor transpose_383_perm_0 = const()[name = string("transpose_383_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_384_perm_0 = const()[name = string("transpose_384_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_384 = transpose(perm = transpose_384_perm_0, x = k_cast_fp16)[name = string("transpose_386")]; tensor transpose_383 = transpose(perm = transpose_383_perm_0, x = q_cast_fp16)[name = string("transpose_387")]; tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_383, y = transpose_384)[name = string("qk_cast_fp16")]; tensor var_6877_cast_fp16 = softmax(axis = var_6721, x = qk_cast_fp16)[name = string("op_6877_cast_fp16")]; bool var_6879_transpose_x_0 = const()[name = string("op_6879_transpose_x_0"), val = bool(false)]; bool var_6879_transpose_y_0 = const()[name = string("op_6879_transpose_y_0"), val = bool(false)]; tensor v_cast_fp16 = transpose(perm = var_6873, x = var_6872_cast_fp16)[name = string("transpose_388")]; tensor var_6879_cast_fp16 = matmul(transpose_x = var_6879_transpose_x_0, transpose_y = var_6879_transpose_y_0, x = var_6877_cast_fp16, y = v_cast_fp16)[name = string("op_6879_cast_fp16")]; tensor var_6880 = const()[name = string("op_6880"), val = tensor([0, 2, 1, 3])]; tensor concat_705x = const()[name = string("concat_705x"), val = tensor([1, -1, 1280])]; tensor var_6881_cast_fp16 = transpose(perm = var_6880, x = var_6879_cast_fp16)[name = string("transpose_385")]; tensor x_571_cast_fp16 = reshape(shape = concat_705x, x = var_6881_cast_fp16)[name = string("x_571_cast_fp16")]; tensor var_6885_to_fp16 = const()[name = string("op_6885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578016128)))]; tensor var_6886_to_fp16 = const()[name = string("op_6886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581292992)))]; tensor linear_253_cast_fp16 = linear(bias = var_6886_to_fp16, weight = var_6885_to_fp16, x = x_571_cast_fp16)[name = string("linear_253_cast_fp16")]; tensor x_573_cast_fp16 = add(x = x_567_cast_fp16, y = linear_253_cast_fp16)[name = string("x_573_cast_fp16")]; tensor var_6893_axes_0 = const()[name = string("op_6893_axes_0"), val = tensor([-1])]; tensor blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581295616)))]; tensor blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581298240)))]; tensor var_6893_cast_fp16 = layer_norm(axes = var_6893_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_573_cast_fp16)[name = string("op_6893_cast_fp16")]; tensor var_6902_to_fp16 = const()[name = string("op_6902_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581300864)))]; tensor var_6903_to_fp16 = const()[name = string("op_6903_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594408128)))]; tensor linear_254_cast_fp16 = linear(bias = var_6903_to_fp16, weight = var_6902_to_fp16, x = var_6893_cast_fp16)[name = string("linear_254_cast_fp16")]; string x_577_mode_0 = const()[name = string("x_577_mode_0"), val = string("EXACT")]; tensor x_577_cast_fp16 = gelu(mode = x_577_mode_0, x = linear_254_cast_fp16)[name = string("x_577_cast_fp16")]; tensor var_6908_to_fp16 = const()[name = string("op_6908_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594418432)))]; tensor var_6909_to_fp16 = const()[name = string("op_6909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607525696)))]; tensor linear_255_cast_fp16 = linear(bias = var_6909_to_fp16, weight = var_6908_to_fp16, x = x_577_cast_fp16)[name = string("linear_255_cast_fp16")]; tensor x_579_cast_fp16 = add(x = x_573_cast_fp16, y = linear_255_cast_fp16)[name = string("x_579_cast_fp16")]; tensor var_6922_axes_0 = const()[name = string("op_6922_axes_0"), val = tensor([-1])]; tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607528320)))]; tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607530944)))]; fp16 var_6913_to_fp16 = const()[name = string("op_6913_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_6922_cast_fp16 = layer_norm(axes = var_6922_axes_0, beta = ln_bias_to_fp16, epsilon = var_6913_to_fp16, gamma = ln_weight_to_fp16, x = x_579_cast_fp16)[name = string("op_6922_cast_fp16")]; tensor var_6932_bias_0_to_fp16 = const()[name = string("op_6932_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607533568)))]; tensor logits = linear(bias = var_6932_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_6922_cast_fp16)[name = string("op_6932_cast_fp16")]; } -> (logits); }