program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] { func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { tensor var_38_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_38_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_38_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_38_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; tensor var_38_shape_cast_fp16_to_int16 = cast(dtype = var_38_shape_cast_fp16_to_int16_dtype_0, x = var_38_shape_cast_fp16)[name = string("cast_154")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_38_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_42_shape = shape(x = token_data)[name = string("op_42_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_42_shape_to_uint16_dtype_0 = const()[name = string("op_42_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_42_shape_to_uint16 = cast(dtype = var_42_shape_to_uint16_dtype_0, x = var_42_shape)[name = string("cast_152")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_42_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_151")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_153")]; int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; int32 var_74_axis_0 = const()[name = string("op_74_axis_0"), val = int32(0)]; int32 var_74_batch_dims_0 = const()[name = string("op_74_batch_dims_0"), val = int32(0)]; bool var_74_validate_indices_0 = const()[name = string("op_74_validate_indices_0"), val = bool(false)]; tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_74_cast_fp16 = gather(axis = var_74_axis_0, batch_dims = var_74_batch_dims_0, indices = token_data, validate_indices = var_74_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_74_cast_fp16")]; int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(768)]; int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; tensor var_77_end_mask_0 = const()[name = string("op_77_end_mask_0"), val = tensor([false, true])]; tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79664768)))]; tensor var_77_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_77_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_77_cast_fp16")]; tensor x_3_cast_fp16 = add(x = var_74_cast_fp16, y = var_77_cast_fp16)[name = string("x_3_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 768])]; tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 768])]; tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 768])]; tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 768])]; tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; int32 var_100 = const()[name = string("op_100"), val = int32(-1)]; tensor var_118_axes_0 = const()[name = string("op_118_axes_0"), val = tensor([-1])]; tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80352960)))]; tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80354560)))]; fp16 var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_118_cast_fp16 = layer_norm(axes = var_118_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_118_cast_fp16")]; tensor var_129_to_fp16 = const()[name = string("op_129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80356160)))]; tensor var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81535872)))]; tensor linear_0_cast_fp16 = linear(bias = var_130_to_fp16, weight = var_129_to_fp16, x = var_118_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_133_to_fp16 = const()[name = string("op_133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81537472)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82717184)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_133_to_fp16, x = var_118_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_137_to_fp16 = const()[name = string("op_137_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82718784)))]; tensor var_138_to_fp16 = const()[name = string("op_138_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83898496)))]; tensor linear_2_cast_fp16 = linear(bias = var_138_to_fp16, weight = var_137_to_fp16, x = var_118_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_140_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_140_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_140_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_140_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_140_shape_cast_fp16_to_uint16 = cast(dtype = var_140_shape_cast_fp16_to_uint16_dtype_0, x = var_140_shape_cast_fp16)[name = string("cast_150")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_140_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_149")]; int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_24 = read_state(input = k_cache1)[name = string("coreml_update_state_24")]; tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_25 = read_state(input = v_cache1)[name = string("coreml_update_state_25")]; int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(768)]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; tensor var_156_begin_0 = const()[name = string("op_156_begin_0"), val = tensor([0, 0, 0])]; tensor var_156_end_mask_0 = const()[name = string("op_156_end_mask_0"), val = tensor([true, false, true])]; tensor var_156_cast_fp16 = slice_by_index(begin = var_156_begin_0, end = concat_10, end_mask = var_156_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_156_cast_fp16")]; tensor var_159_begin_0 = const()[name = string("op_159_begin_0"), val = tensor([0, 0, 0])]; tensor var_159_end_mask_0 = const()[name = string("op_159_end_mask_0"), val = tensor([true, false, true])]; tensor var_159_cast_fp16 = slice_by_index(begin = var_159_begin_0, end = concat_10, end_mask = var_159_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_159_cast_fp16")]; tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 12, 64])]; tensor var_169_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")]; tensor const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_60_to_fp16)[name = string("q_3_cast_fp16")]; tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 12, 64])]; tensor var_176_cast_fp16 = reshape(shape = concat_13x, x = var_156_cast_fp16)[name = string("op_176_cast_fp16")]; tensor const_61_to_fp16 = const()[name = string("const_61_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_5_cast_fp16 = mul(x = var_176_cast_fp16, y = const_61_to_fp16)[name = string("k_5_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 12, 64])]; tensor var_183_cast_fp16 = reshape(shape = concat_14x, x = var_159_cast_fp16)[name = string("op_183_cast_fp16")]; tensor var_184 = const()[name = string("op_184"), val = tensor([0, 2, 1, 3])]; bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; tensor transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = k_5_cast_fp16)[name = string("transpose_238")]; tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = q_3_cast_fp16)[name = string("transpose_239")]; tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_97, y = transpose_98)[name = string("qk_1_cast_fp16")]; int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; tensor var_187_begin_0 = const()[name = string("op_187_begin_0"), val = tensor([0, 0])]; tensor var_187_end_mask_0 = const()[name = string("op_187_end_mask_0"), val = tensor([false, true])]; tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83900096)))]; tensor var_187_cast_fp16 = slice_by_index(begin = var_187_begin_0, end = concat_15, end_mask = var_187_end_mask_0, x = mask_to_fp16)[name = string("op_187_cast_fp16")]; int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; tensor var_188_begin_0 = const()[name = string("op_188_begin_0"), val = tensor([0, 0])]; tensor var_188_end_mask_0 = const()[name = string("op_188_end_mask_0"), val = tensor([true, false])]; tensor var_188_cast_fp16 = slice_by_index(begin = var_188_begin_0, end = concat_16, end_mask = var_188_end_mask_0, x = var_187_cast_fp16)[name = string("op_188_cast_fp16")]; tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_188_cast_fp16)[name = string("qk_3_cast_fp16")]; tensor var_191_cast_fp16 = softmax(axis = var_100, x = qk_3_cast_fp16)[name = string("op_191_cast_fp16")]; bool var_193_transpose_x_0 = const()[name = string("op_193_transpose_x_0"), val = bool(false)]; bool var_193_transpose_y_0 = const()[name = string("op_193_transpose_y_0"), val = bool(false)]; tensor v_5_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_240")]; tensor var_193_cast_fp16 = matmul(transpose_x = var_193_transpose_x_0, transpose_y = var_193_transpose_y_0, x = var_191_cast_fp16, y = v_5_cast_fp16)[name = string("op_193_cast_fp16")]; tensor var_194 = const()[name = string("op_194"), val = tensor([0, 2, 1, 3])]; tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 768])]; tensor var_195_cast_fp16 = transpose(perm = var_194, x = var_193_cast_fp16)[name = string("transpose_237")]; tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_195_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84301568)))]; tensor var_200_to_fp16 = const()[name = string("op_200_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85481280)))]; tensor linear_3_cast_fp16 = linear(bias = var_200_to_fp16, weight = var_199_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_207_axes_0 = const()[name = string("op_207_axes_0"), val = tensor([-1])]; tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85482880)))]; tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85484480)))]; tensor var_207_cast_fp16 = layer_norm(axes = var_207_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_207_cast_fp16")]; tensor var_216_to_fp16 = const()[name = string("op_216_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85486080)))]; tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86665792)))]; tensor linear_4_cast_fp16 = linear(bias = var_217_to_fp16, weight = var_216_to_fp16, x = var_207_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86667392)))]; tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 12, 64])]; tensor var_237_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_237_cast_fp16")]; tensor const_62_to_fp16 = const()[name = string("const_62_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_7_cast_fp16 = mul(x = var_237_cast_fp16, y = const_62_to_fp16)[name = string("q_7_cast_fp16")]; tensor var_243 = const()[name = string("op_243"), val = tensor([1, 1500, 12, -1])]; tensor var_244_cast_fp16 = reshape(shape = var_243, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_244_cast_fp16")]; tensor const_63_to_fp16 = const()[name = string("const_63_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_9_cast_fp16 = mul(x = var_244_cast_fp16, y = const_63_to_fp16)[name = string("k_9_cast_fp16")]; tensor var_250 = const()[name = string("op_250"), val = tensor([1, 1500, 12, -1])]; tensor var_251_cast_fp16 = reshape(shape = var_250, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_251_cast_fp16")]; tensor var_252 = const()[name = string("op_252"), val = tensor([0, 2, 1, 3])]; bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; tensor transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = k_9_cast_fp16)[name = string("transpose_234")]; tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = q_7_cast_fp16)[name = string("transpose_235")]; tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_99, y = transpose_100)[name = string("qk_5_cast_fp16")]; tensor var_256_cast_fp16 = softmax(axis = var_100, x = qk_5_cast_fp16)[name = string("op_256_cast_fp16")]; bool var_258_transpose_x_0 = const()[name = string("op_258_transpose_x_0"), val = bool(false)]; bool var_258_transpose_y_0 = const()[name = string("op_258_transpose_y_0"), val = bool(false)]; tensor v_9_cast_fp16 = transpose(perm = var_252, x = var_251_cast_fp16)[name = string("transpose_236")]; tensor var_258_cast_fp16 = matmul(transpose_x = var_258_transpose_x_0, transpose_y = var_258_transpose_y_0, x = var_256_cast_fp16, y = v_9_cast_fp16)[name = string("op_258_cast_fp16")]; tensor var_259 = const()[name = string("op_259"), val = tensor([0, 2, 1, 3])]; tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 768])]; tensor var_260_cast_fp16 = transpose(perm = var_259, x = var_258_cast_fp16)[name = string("transpose_233")]; tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_260_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88971456)))]; tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90151168)))]; tensor linear_5_cast_fp16 = linear(bias = var_265_to_fp16, weight = var_264_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_272_axes_0 = const()[name = string("op_272_axes_0"), val = tensor([-1])]; tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90152768)))]; tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90154368)))]; tensor var_272_cast_fp16 = layer_norm(axes = var_272_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_272_cast_fp16")]; tensor var_281_to_fp16 = const()[name = string("op_281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90155968)))]; tensor var_282_to_fp16 = const()[name = string("op_282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94874624)))]; tensor linear_6_cast_fp16 = linear(bias = var_282_to_fp16, weight = var_281_to_fp16, x = var_272_cast_fp16)[name = string("linear_6_cast_fp16")]; string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_287_to_fp16 = const()[name = string("op_287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94880832)))]; tensor var_288_to_fp16 = const()[name = string("op_288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99599488)))]; tensor linear_7_cast_fp16 = linear(bias = var_288_to_fp16, weight = var_287_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 768])]; tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_24)[name = string("k_cache_5_cast_fp16")]; tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 768])]; tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_25)[name = string("v_cache_5_cast_fp16")]; tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 768])]; tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 768])]; tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; int32 var_311 = const()[name = string("op_311"), val = int32(-1)]; tensor var_329_axes_0 = const()[name = string("op_329_axes_0"), val = tensor([-1])]; tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99601088)))]; tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99602688)))]; fp16 var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_329_cast_fp16 = layer_norm(axes = var_329_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_329_cast_fp16")]; tensor var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99604288)))]; tensor var_341_to_fp16 = const()[name = string("op_341_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100784000)))]; tensor linear_8_cast_fp16 = linear(bias = var_341_to_fp16, weight = var_340_to_fp16, x = var_329_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100785600)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_344_to_fp16, x = var_329_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_348_to_fp16 = const()[name = string("op_348_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101965312)))]; tensor var_349_to_fp16 = const()[name = string("op_349_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103145024)))]; tensor linear_10_cast_fp16 = linear(bias = var_349_to_fp16, weight = var_348_to_fp16, x = var_329_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_351_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_351_shape_cast_fp16")]; int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; string var_351_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_351_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; tensor var_351_shape_cast_fp16_to_uint16 = cast(dtype = var_351_shape_cast_fp16_to_uint16_dtype_0, x = var_351_shape_cast_fp16)[name = string("cast_148")]; uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_351_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_147")]; int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_24)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_26 = read_state(input = k_cache1)[name = string("coreml_update_state_26")]; tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_25)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_27 = read_state(input = v_cache1)[name = string("coreml_update_state_27")]; int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(768)]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; tensor var_367_begin_0 = const()[name = string("op_367_begin_0"), val = tensor([0, 0, 0])]; tensor var_367_end_mask_0 = const()[name = string("op_367_end_mask_0"), val = tensor([true, false, true])]; tensor var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = concat_32, end_mask = var_367_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_367_cast_fp16")]; tensor var_370_begin_0 = const()[name = string("op_370_begin_0"), val = tensor([0, 0, 0])]; tensor var_370_end_mask_0 = const()[name = string("op_370_end_mask_0"), val = tensor([true, false, true])]; tensor var_370_cast_fp16 = slice_by_index(begin = var_370_begin_0, end = concat_32, end_mask = var_370_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_370_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 12, 64])]; tensor var_380_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_380_cast_fp16")]; tensor const_64_to_fp16 = const()[name = string("const_64_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_11_cast_fp16 = mul(x = var_380_cast_fp16, y = const_64_to_fp16)[name = string("q_11_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 12, 64])]; tensor var_387_cast_fp16 = reshape(shape = concat_35x, x = var_367_cast_fp16)[name = string("op_387_cast_fp16")]; tensor const_65_to_fp16 = const()[name = string("const_65_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_15_cast_fp16 = mul(x = var_387_cast_fp16, y = const_65_to_fp16)[name = string("k_15_cast_fp16")]; tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 12, 64])]; tensor var_394_cast_fp16 = reshape(shape = concat_36x, x = var_370_cast_fp16)[name = string("op_394_cast_fp16")]; tensor var_395 = const()[name = string("op_395"), val = tensor([0, 2, 1, 3])]; bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; tensor transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = k_15_cast_fp16)[name = string("transpose_230")]; tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = q_11_cast_fp16)[name = string("transpose_231")]; tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_101, y = transpose_102)[name = string("qk_7_cast_fp16")]; int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; tensor var_398_begin_0 = const()[name = string("op_398_begin_0"), val = tensor([0, 0])]; tensor var_398_end_mask_0 = const()[name = string("op_398_end_mask_0"), val = tensor([false, true])]; tensor var_398_cast_fp16 = slice_by_index(begin = var_398_begin_0, end = concat_37, end_mask = var_398_end_mask_0, x = mask_to_fp16)[name = string("op_398_cast_fp16")]; int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; tensor var_399_begin_0 = const()[name = string("op_399_begin_0"), val = tensor([0, 0])]; tensor var_399_end_mask_0 = const()[name = string("op_399_end_mask_0"), val = tensor([true, false])]; tensor var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = concat_38, end_mask = var_399_end_mask_0, x = var_398_cast_fp16)[name = string("op_399_cast_fp16")]; tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_399_cast_fp16)[name = string("qk_9_cast_fp16")]; tensor var_402_cast_fp16 = softmax(axis = var_311, x = qk_9_cast_fp16)[name = string("op_402_cast_fp16")]; bool var_404_transpose_x_0 = const()[name = string("op_404_transpose_x_0"), val = bool(false)]; bool var_404_transpose_y_0 = const()[name = string("op_404_transpose_y_0"), val = bool(false)]; tensor v_15_cast_fp16 = transpose(perm = var_395, x = var_394_cast_fp16)[name = string("transpose_232")]; tensor var_404_cast_fp16 = matmul(transpose_x = var_404_transpose_x_0, transpose_y = var_404_transpose_y_0, x = var_402_cast_fp16, y = v_15_cast_fp16)[name = string("op_404_cast_fp16")]; tensor var_405 = const()[name = string("op_405"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 768])]; tensor var_406_cast_fp16 = transpose(perm = var_405, x = var_404_cast_fp16)[name = string("transpose_229")]; tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_406_cast_fp16)[name = string("x_25_cast_fp16")]; tensor var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103146624)))]; tensor var_411_to_fp16 = const()[name = string("op_411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104326336)))]; tensor linear_11_cast_fp16 = linear(bias = var_411_to_fp16, weight = var_410_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor([-1])]; tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104327936)))]; tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104329536)))]; tensor var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_418_cast_fp16")]; tensor var_427_to_fp16 = const()[name = string("op_427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104331136)))]; tensor var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105510848)))]; tensor linear_12_cast_fp16 = linear(bias = var_428_to_fp16, weight = var_427_to_fp16, x = var_418_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 12, 64])]; tensor var_448_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_448_cast_fp16")]; tensor const_66_to_fp16 = const()[name = string("const_66_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_15_cast_fp16 = mul(x = var_448_cast_fp16, y = const_66_to_fp16)[name = string("q_15_cast_fp16")]; tensor var_454 = const()[name = string("op_454"), val = tensor([1, 1500, 12, -1])]; tensor var_455_cast_fp16 = reshape(shape = var_454, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_455_cast_fp16")]; tensor const_67_to_fp16 = const()[name = string("const_67_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_19_cast_fp16 = mul(x = var_455_cast_fp16, y = const_67_to_fp16)[name = string("k_19_cast_fp16")]; tensor var_461 = const()[name = string("op_461"), val = tensor([1, 1500, 12, -1])]; tensor var_462_cast_fp16 = reshape(shape = var_461, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_462_cast_fp16")]; tensor var_463 = const()[name = string("op_463"), val = tensor([0, 2, 1, 3])]; bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; tensor transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = k_19_cast_fp16)[name = string("transpose_226")]; tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = q_15_cast_fp16)[name = string("transpose_227")]; tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_103, y = transpose_104)[name = string("qk_11_cast_fp16")]; tensor var_467_cast_fp16 = softmax(axis = var_311, x = qk_11_cast_fp16)[name = string("op_467_cast_fp16")]; bool var_469_transpose_x_0 = const()[name = string("op_469_transpose_x_0"), val = bool(false)]; bool var_469_transpose_y_0 = const()[name = string("op_469_transpose_y_0"), val = bool(false)]; tensor v_19_cast_fp16 = transpose(perm = var_463, x = var_462_cast_fp16)[name = string("transpose_228")]; tensor var_469_cast_fp16 = matmul(transpose_x = var_469_transpose_x_0, transpose_y = var_469_transpose_y_0, x = var_467_cast_fp16, y = v_19_cast_fp16)[name = string("op_469_cast_fp16")]; tensor var_470 = const()[name = string("op_470"), val = tensor([0, 2, 1, 3])]; tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 768])]; tensor var_471_cast_fp16 = transpose(perm = var_470, x = var_469_cast_fp16)[name = string("transpose_225")]; tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_471_cast_fp16)[name = string("x_31_cast_fp16")]; tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105512448)))]; tensor var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106692160)))]; tensor linear_13_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_483_axes_0 = const()[name = string("op_483_axes_0"), val = tensor([-1])]; tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106693760)))]; tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106695360)))]; tensor var_483_cast_fp16 = layer_norm(axes = var_483_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_483_cast_fp16")]; tensor var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106696960)))]; tensor var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111415616)))]; tensor linear_14_cast_fp16 = linear(bias = var_493_to_fp16, weight = var_492_to_fp16, x = var_483_cast_fp16)[name = string("linear_14_cast_fp16")]; string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; tensor var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111421824)))]; tensor var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116140480)))]; tensor linear_15_cast_fp16 = linear(bias = var_499_to_fp16, weight = var_498_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 768])]; tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_26)[name = string("k_cache_9_cast_fp16")]; tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 768])]; tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_27)[name = string("v_cache_9_cast_fp16")]; tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 768])]; tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 768])]; tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; int32 var_522 = const()[name = string("op_522"), val = int32(-1)]; tensor var_540_axes_0 = const()[name = string("op_540_axes_0"), val = tensor([-1])]; tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116142080)))]; tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116143680)))]; fp16 var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_540_cast_fp16 = layer_norm(axes = var_540_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_540_cast_fp16")]; tensor var_551_to_fp16 = const()[name = string("op_551_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116145280)))]; tensor var_552_to_fp16 = const()[name = string("op_552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117324992)))]; tensor linear_16_cast_fp16 = linear(bias = var_552_to_fp16, weight = var_551_to_fp16, x = var_540_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117326592)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_555_to_fp16, x = var_540_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118506304)))]; tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119686016)))]; tensor linear_18_cast_fp16 = linear(bias = var_560_to_fp16, weight = var_559_to_fp16, x = var_540_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_562_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_562_shape_cast_fp16")]; int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; string var_562_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_562_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; tensor var_562_shape_cast_fp16_to_uint16 = cast(dtype = var_562_shape_cast_fp16_to_uint16_dtype_0, x = var_562_shape_cast_fp16)[name = string("cast_146")]; uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_562_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_145")]; int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_26)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_28 = read_state(input = k_cache1)[name = string("coreml_update_state_28")]; tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_27)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_29 = read_state(input = v_cache1)[name = string("coreml_update_state_29")]; int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(768)]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; tensor var_578_begin_0 = const()[name = string("op_578_begin_0"), val = tensor([0, 0, 0])]; tensor var_578_end_mask_0 = const()[name = string("op_578_end_mask_0"), val = tensor([true, false, true])]; tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = concat_54, end_mask = var_578_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_578_cast_fp16")]; tensor var_581_begin_0 = const()[name = string("op_581_begin_0"), val = tensor([0, 0, 0])]; tensor var_581_end_mask_0 = const()[name = string("op_581_end_mask_0"), val = tensor([true, false, true])]; tensor var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = concat_54, end_mask = var_581_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_581_cast_fp16")]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 12, 64])]; tensor var_591_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_591_cast_fp16")]; tensor const_68_to_fp16 = const()[name = string("const_68_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_19_cast_fp16 = mul(x = var_591_cast_fp16, y = const_68_to_fp16)[name = string("q_19_cast_fp16")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 12, 64])]; tensor var_598_cast_fp16 = reshape(shape = concat_57x, x = var_578_cast_fp16)[name = string("op_598_cast_fp16")]; tensor const_69_to_fp16 = const()[name = string("const_69_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_25_cast_fp16 = mul(x = var_598_cast_fp16, y = const_69_to_fp16)[name = string("k_25_cast_fp16")]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 12, 64])]; tensor var_605_cast_fp16 = reshape(shape = concat_58x, x = var_581_cast_fp16)[name = string("op_605_cast_fp16")]; tensor var_606 = const()[name = string("op_606"), val = tensor([0, 2, 1, 3])]; bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; tensor transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = k_25_cast_fp16)[name = string("transpose_222")]; tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = q_19_cast_fp16)[name = string("transpose_223")]; tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_105, y = transpose_106)[name = string("qk_13_cast_fp16")]; int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; tensor var_609_begin_0 = const()[name = string("op_609_begin_0"), val = tensor([0, 0])]; tensor var_609_end_mask_0 = const()[name = string("op_609_end_mask_0"), val = tensor([false, true])]; tensor var_609_cast_fp16 = slice_by_index(begin = var_609_begin_0, end = concat_59, end_mask = var_609_end_mask_0, x = mask_to_fp16)[name = string("op_609_cast_fp16")]; int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; tensor var_610_begin_0 = const()[name = string("op_610_begin_0"), val = tensor([0, 0])]; tensor var_610_end_mask_0 = const()[name = string("op_610_end_mask_0"), val = tensor([true, false])]; tensor var_610_cast_fp16 = slice_by_index(begin = var_610_begin_0, end = concat_60, end_mask = var_610_end_mask_0, x = var_609_cast_fp16)[name = string("op_610_cast_fp16")]; tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_610_cast_fp16)[name = string("qk_15_cast_fp16")]; tensor var_613_cast_fp16 = softmax(axis = var_522, x = qk_15_cast_fp16)[name = string("op_613_cast_fp16")]; bool var_615_transpose_x_0 = const()[name = string("op_615_transpose_x_0"), val = bool(false)]; bool var_615_transpose_y_0 = const()[name = string("op_615_transpose_y_0"), val = bool(false)]; tensor v_25_cast_fp16 = transpose(perm = var_606, x = var_605_cast_fp16)[name = string("transpose_224")]; tensor var_615_cast_fp16 = matmul(transpose_x = var_615_transpose_x_0, transpose_y = var_615_transpose_y_0, x = var_613_cast_fp16, y = v_25_cast_fp16)[name = string("op_615_cast_fp16")]; tensor var_616 = const()[name = string("op_616"), val = tensor([0, 2, 1, 3])]; tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 768])]; tensor var_617_cast_fp16 = transpose(perm = var_616, x = var_615_cast_fp16)[name = string("transpose_221")]; tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_617_cast_fp16)[name = string("x_43_cast_fp16")]; tensor var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119687616)))]; tensor var_622_to_fp16 = const()[name = string("op_622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120867328)))]; tensor linear_19_cast_fp16 = linear(bias = var_622_to_fp16, weight = var_621_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_629_axes_0 = const()[name = string("op_629_axes_0"), val = tensor([-1])]; tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120868928)))]; tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120870528)))]; tensor var_629_cast_fp16 = layer_norm(axes = var_629_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_629_cast_fp16")]; tensor var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120872128)))]; tensor var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122051840)))]; tensor linear_20_cast_fp16 = linear(bias = var_639_to_fp16, weight = var_638_to_fp16, x = var_629_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 12, 64])]; tensor var_659_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_659_cast_fp16")]; tensor const_70_to_fp16 = const()[name = string("const_70_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_23_cast_fp16 = mul(x = var_659_cast_fp16, y = const_70_to_fp16)[name = string("q_23_cast_fp16")]; tensor var_665 = const()[name = string("op_665"), val = tensor([1, 1500, 12, -1])]; tensor var_666_cast_fp16 = reshape(shape = var_665, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_666_cast_fp16")]; tensor const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_29_cast_fp16 = mul(x = var_666_cast_fp16, y = const_71_to_fp16)[name = string("k_29_cast_fp16")]; tensor var_672 = const()[name = string("op_672"), val = tensor([1, 1500, 12, -1])]; tensor var_673_cast_fp16 = reshape(shape = var_672, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_673_cast_fp16")]; tensor var_674 = const()[name = string("op_674"), val = tensor([0, 2, 1, 3])]; bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; tensor transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = k_29_cast_fp16)[name = string("transpose_218")]; tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = q_23_cast_fp16)[name = string("transpose_219")]; tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_107, y = transpose_108)[name = string("qk_17_cast_fp16")]; tensor var_678_cast_fp16 = softmax(axis = var_522, x = qk_17_cast_fp16)[name = string("op_678_cast_fp16")]; bool var_680_transpose_x_0 = const()[name = string("op_680_transpose_x_0"), val = bool(false)]; bool var_680_transpose_y_0 = const()[name = string("op_680_transpose_y_0"), val = bool(false)]; tensor v_29_cast_fp16 = transpose(perm = var_674, x = var_673_cast_fp16)[name = string("transpose_220")]; tensor var_680_cast_fp16 = matmul(transpose_x = var_680_transpose_x_0, transpose_y = var_680_transpose_y_0, x = var_678_cast_fp16, y = v_29_cast_fp16)[name = string("op_680_cast_fp16")]; tensor var_681 = const()[name = string("op_681"), val = tensor([0, 2, 1, 3])]; tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 768])]; tensor var_682_cast_fp16 = transpose(perm = var_681, x = var_680_cast_fp16)[name = string("transpose_217")]; tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_682_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122053440)))]; tensor var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123233152)))]; tensor linear_21_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; tensor var_694_axes_0 = const()[name = string("op_694_axes_0"), val = tensor([-1])]; tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123234752)))]; tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123236352)))]; tensor var_694_cast_fp16 = layer_norm(axes = var_694_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_694_cast_fp16")]; tensor var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123237952)))]; tensor var_704_to_fp16 = const()[name = string("op_704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127956608)))]; tensor linear_22_cast_fp16 = linear(bias = var_704_to_fp16, weight = var_703_to_fp16, x = var_694_cast_fp16)[name = string("linear_22_cast_fp16")]; string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127962816)))]; tensor var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132681472)))]; tensor linear_23_cast_fp16 = linear(bias = var_710_to_fp16, weight = var_709_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 768])]; tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_28)[name = string("k_cache_13_cast_fp16")]; tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 768])]; tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_29)[name = string("v_cache_13_cast_fp16")]; tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 768])]; tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 768])]; tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; int32 var_733 = const()[name = string("op_733"), val = int32(-1)]; tensor var_751_axes_0 = const()[name = string("op_751_axes_0"), val = tensor([-1])]; tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132683072)))]; tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132684672)))]; fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_751_cast_fp16 = layer_norm(axes = var_751_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_751_cast_fp16")]; tensor var_762_to_fp16 = const()[name = string("op_762_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132686272)))]; tensor var_763_to_fp16 = const()[name = string("op_763_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133865984)))]; tensor linear_24_cast_fp16 = linear(bias = var_763_to_fp16, weight = var_762_to_fp16, x = var_751_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133867584)))]; tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_766_to_fp16, x = var_751_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_770_to_fp16 = const()[name = string("op_770_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135047296)))]; tensor var_771_to_fp16 = const()[name = string("op_771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136227008)))]; tensor linear_26_cast_fp16 = linear(bias = var_771_to_fp16, weight = var_770_to_fp16, x = var_751_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor var_773_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_773_shape_cast_fp16")]; int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; string var_773_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_773_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; tensor var_773_shape_cast_fp16_to_uint16 = cast(dtype = var_773_shape_cast_fp16_to_uint16_dtype_0, x = var_773_shape_cast_fp16)[name = string("cast_144")]; uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_773_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_143")]; int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_28)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_30 = read_state(input = k_cache1)[name = string("coreml_update_state_30")]; tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_29)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_31 = read_state(input = v_cache1)[name = string("coreml_update_state_31")]; int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(768)]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; tensor var_789_begin_0 = const()[name = string("op_789_begin_0"), val = tensor([0, 0, 0])]; tensor var_789_end_mask_0 = const()[name = string("op_789_end_mask_0"), val = tensor([true, false, true])]; tensor var_789_cast_fp16 = slice_by_index(begin = var_789_begin_0, end = concat_76, end_mask = var_789_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_789_cast_fp16")]; tensor var_792_begin_0 = const()[name = string("op_792_begin_0"), val = tensor([0, 0, 0])]; tensor var_792_end_mask_0 = const()[name = string("op_792_end_mask_0"), val = tensor([true, false, true])]; tensor var_792_cast_fp16 = slice_by_index(begin = var_792_begin_0, end = concat_76, end_mask = var_792_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_792_cast_fp16")]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 12, 64])]; tensor var_802_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_802_cast_fp16")]; tensor const_72_to_fp16 = const()[name = string("const_72_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_27_cast_fp16 = mul(x = var_802_cast_fp16, y = const_72_to_fp16)[name = string("q_27_cast_fp16")]; tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 12, 64])]; tensor var_809_cast_fp16 = reshape(shape = concat_79x, x = var_789_cast_fp16)[name = string("op_809_cast_fp16")]; tensor const_73_to_fp16 = const()[name = string("const_73_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_35_cast_fp16 = mul(x = var_809_cast_fp16, y = const_73_to_fp16)[name = string("k_35_cast_fp16")]; tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 12, 64])]; tensor var_816_cast_fp16 = reshape(shape = concat_80x, x = var_792_cast_fp16)[name = string("op_816_cast_fp16")]; tensor var_817 = const()[name = string("op_817"), val = tensor([0, 2, 1, 3])]; bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; tensor transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = k_35_cast_fp16)[name = string("transpose_214")]; tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = q_27_cast_fp16)[name = string("transpose_215")]; tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_109, y = transpose_110)[name = string("qk_19_cast_fp16")]; int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; tensor var_820_begin_0 = const()[name = string("op_820_begin_0"), val = tensor([0, 0])]; tensor var_820_end_mask_0 = const()[name = string("op_820_end_mask_0"), val = tensor([false, true])]; tensor var_820_cast_fp16 = slice_by_index(begin = var_820_begin_0, end = concat_81, end_mask = var_820_end_mask_0, x = mask_to_fp16)[name = string("op_820_cast_fp16")]; int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; tensor var_821_begin_0 = const()[name = string("op_821_begin_0"), val = tensor([0, 0])]; tensor var_821_end_mask_0 = const()[name = string("op_821_end_mask_0"), val = tensor([true, false])]; tensor var_821_cast_fp16 = slice_by_index(begin = var_821_begin_0, end = concat_82, end_mask = var_821_end_mask_0, x = var_820_cast_fp16)[name = string("op_821_cast_fp16")]; tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_821_cast_fp16)[name = string("qk_21_cast_fp16")]; tensor var_824_cast_fp16 = softmax(axis = var_733, x = qk_21_cast_fp16)[name = string("op_824_cast_fp16")]; bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)]; bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)]; tensor v_35_cast_fp16 = transpose(perm = var_817, x = var_816_cast_fp16)[name = string("transpose_216")]; tensor var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_35_cast_fp16)[name = string("op_826_cast_fp16")]; tensor var_827 = const()[name = string("op_827"), val = tensor([0, 2, 1, 3])]; tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 768])]; tensor var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_213")]; tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_828_cast_fp16)[name = string("x_61_cast_fp16")]; tensor var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136228608)))]; tensor var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137408320)))]; tensor linear_27_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; tensor var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor([-1])]; tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137409920)))]; tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137411520)))]; tensor var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_840_cast_fp16")]; tensor var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137413120)))]; tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138592832)))]; tensor linear_28_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 12, 64])]; tensor var_870_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_870_cast_fp16")]; tensor const_74_to_fp16 = const()[name = string("const_74_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_31_cast_fp16 = mul(x = var_870_cast_fp16, y = const_74_to_fp16)[name = string("q_31_cast_fp16")]; tensor var_876 = const()[name = string("op_876"), val = tensor([1, 1500, 12, -1])]; tensor var_877_cast_fp16 = reshape(shape = var_876, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_877_cast_fp16")]; tensor const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_39_cast_fp16 = mul(x = var_877_cast_fp16, y = const_75_to_fp16)[name = string("k_39_cast_fp16")]; tensor var_883 = const()[name = string("op_883"), val = tensor([1, 1500, 12, -1])]; tensor var_884_cast_fp16 = reshape(shape = var_883, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_884_cast_fp16")]; tensor var_885 = const()[name = string("op_885"), val = tensor([0, 2, 1, 3])]; bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; tensor transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = k_39_cast_fp16)[name = string("transpose_210")]; tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = q_31_cast_fp16)[name = string("transpose_211")]; tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_111, y = transpose_112)[name = string("qk_23_cast_fp16")]; tensor var_889_cast_fp16 = softmax(axis = var_733, x = qk_23_cast_fp16)[name = string("op_889_cast_fp16")]; bool var_891_transpose_x_0 = const()[name = string("op_891_transpose_x_0"), val = bool(false)]; bool var_891_transpose_y_0 = const()[name = string("op_891_transpose_y_0"), val = bool(false)]; tensor v_39_cast_fp16 = transpose(perm = var_885, x = var_884_cast_fp16)[name = string("transpose_212")]; tensor var_891_cast_fp16 = matmul(transpose_x = var_891_transpose_x_0, transpose_y = var_891_transpose_y_0, x = var_889_cast_fp16, y = v_39_cast_fp16)[name = string("op_891_cast_fp16")]; tensor var_892 = const()[name = string("op_892"), val = tensor([0, 2, 1, 3])]; tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 768])]; tensor var_893_cast_fp16 = transpose(perm = var_892, x = var_891_cast_fp16)[name = string("transpose_209")]; tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_893_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138594432)))]; tensor var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139774144)))]; tensor linear_29_cast_fp16 = linear(bias = var_898_to_fp16, weight = var_897_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_905_axes_0 = const()[name = string("op_905_axes_0"), val = tensor([-1])]; tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139775744)))]; tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139777344)))]; tensor var_905_cast_fp16 = layer_norm(axes = var_905_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_905_cast_fp16")]; tensor var_914_to_fp16 = const()[name = string("op_914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139778944)))]; tensor var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144497600)))]; tensor linear_30_cast_fp16 = linear(bias = var_915_to_fp16, weight = var_914_to_fp16, x = var_905_cast_fp16)[name = string("linear_30_cast_fp16")]; string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144503808)))]; tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149222464)))]; tensor linear_31_cast_fp16 = linear(bias = var_921_to_fp16, weight = var_920_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 768])]; tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_30)[name = string("k_cache_17_cast_fp16")]; tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 768])]; tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_31)[name = string("v_cache_17_cast_fp16")]; tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 768])]; tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 768])]; tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; int32 var_944 = const()[name = string("op_944"), val = int32(-1)]; tensor var_962_axes_0 = const()[name = string("op_962_axes_0"), val = tensor([-1])]; tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149224064)))]; tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149225664)))]; fp16 var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_962_cast_fp16 = layer_norm(axes = var_962_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_962_cast_fp16")]; tensor var_973_to_fp16 = const()[name = string("op_973_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149227264)))]; tensor var_974_to_fp16 = const()[name = string("op_974_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150406976)))]; tensor linear_32_cast_fp16 = linear(bias = var_974_to_fp16, weight = var_973_to_fp16, x = var_962_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150408576)))]; tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_977_to_fp16, x = var_962_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151588288)))]; tensor var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152768000)))]; tensor linear_34_cast_fp16 = linear(bias = var_982_to_fp16, weight = var_981_to_fp16, x = var_962_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_984_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_984_shape_cast_fp16")]; int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; string var_984_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_984_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; tensor var_984_shape_cast_fp16_to_uint16 = cast(dtype = var_984_shape_cast_fp16_to_uint16_dtype_0, x = var_984_shape_cast_fp16)[name = string("cast_142")]; uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_984_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_141")]; int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_30)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_32 = read_state(input = k_cache1)[name = string("coreml_update_state_32")]; tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_31)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_33 = read_state(input = v_cache1)[name = string("coreml_update_state_33")]; int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(768)]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; tensor var_1000_begin_0 = const()[name = string("op_1000_begin_0"), val = tensor([0, 0, 0])]; tensor var_1000_end_mask_0 = const()[name = string("op_1000_end_mask_0"), val = tensor([true, false, true])]; tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = concat_98, end_mask = var_1000_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1000_cast_fp16")]; tensor var_1003_begin_0 = const()[name = string("op_1003_begin_0"), val = tensor([0, 0, 0])]; tensor var_1003_end_mask_0 = const()[name = string("op_1003_end_mask_0"), val = tensor([true, false, true])]; tensor var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = concat_98, end_mask = var_1003_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1003_cast_fp16")]; tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 12, 64])]; tensor var_1013_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1013_cast_fp16")]; tensor const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_35_cast_fp16 = mul(x = var_1013_cast_fp16, y = const_76_to_fp16)[name = string("q_35_cast_fp16")]; tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 12, 64])]; tensor var_1020_cast_fp16 = reshape(shape = concat_101x, x = var_1000_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_45_cast_fp16 = mul(x = var_1020_cast_fp16, y = const_77_to_fp16)[name = string("k_45_cast_fp16")]; tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 12, 64])]; tensor var_1027_cast_fp16 = reshape(shape = concat_102x, x = var_1003_cast_fp16)[name = string("op_1027_cast_fp16")]; tensor var_1028 = const()[name = string("op_1028"), val = tensor([0, 2, 1, 3])]; bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; tensor transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = k_45_cast_fp16)[name = string("transpose_206")]; tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = q_35_cast_fp16)[name = string("transpose_207")]; tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_113, y = transpose_114)[name = string("qk_25_cast_fp16")]; int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; tensor var_1031_begin_0 = const()[name = string("op_1031_begin_0"), val = tensor([0, 0])]; tensor var_1031_end_mask_0 = const()[name = string("op_1031_end_mask_0"), val = tensor([false, true])]; tensor var_1031_cast_fp16 = slice_by_index(begin = var_1031_begin_0, end = concat_103, end_mask = var_1031_end_mask_0, x = mask_to_fp16)[name = string("op_1031_cast_fp16")]; int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; tensor var_1032_begin_0 = const()[name = string("op_1032_begin_0"), val = tensor([0, 0])]; tensor var_1032_end_mask_0 = const()[name = string("op_1032_end_mask_0"), val = tensor([true, false])]; tensor var_1032_cast_fp16 = slice_by_index(begin = var_1032_begin_0, end = concat_104, end_mask = var_1032_end_mask_0, x = var_1031_cast_fp16)[name = string("op_1032_cast_fp16")]; tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1032_cast_fp16)[name = string("qk_27_cast_fp16")]; tensor var_1035_cast_fp16 = softmax(axis = var_944, x = qk_27_cast_fp16)[name = string("op_1035_cast_fp16")]; bool var_1037_transpose_x_0 = const()[name = string("op_1037_transpose_x_0"), val = bool(false)]; bool var_1037_transpose_y_0 = const()[name = string("op_1037_transpose_y_0"), val = bool(false)]; tensor v_45_cast_fp16 = transpose(perm = var_1028, x = var_1027_cast_fp16)[name = string("transpose_208")]; tensor var_1037_cast_fp16 = matmul(transpose_x = var_1037_transpose_x_0, transpose_y = var_1037_transpose_y_0, x = var_1035_cast_fp16, y = v_45_cast_fp16)[name = string("op_1037_cast_fp16")]; tensor var_1038 = const()[name = string("op_1038"), val = tensor([0, 2, 1, 3])]; tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 768])]; tensor var_1039_cast_fp16 = transpose(perm = var_1038, x = var_1037_cast_fp16)[name = string("transpose_205")]; tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1039_cast_fp16)[name = string("x_79_cast_fp16")]; tensor var_1043_to_fp16 = const()[name = string("op_1043_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152769600)))]; tensor var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153949312)))]; tensor linear_35_cast_fp16 = linear(bias = var_1044_to_fp16, weight = var_1043_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; tensor var_1051_axes_0 = const()[name = string("op_1051_axes_0"), val = tensor([-1])]; tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153950912)))]; tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153952512)))]; tensor var_1051_cast_fp16 = layer_norm(axes = var_1051_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1051_cast_fp16")]; tensor var_1060_to_fp16 = const()[name = string("op_1060_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153954112)))]; tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155133824)))]; tensor linear_36_cast_fp16 = linear(bias = var_1061_to_fp16, weight = var_1060_to_fp16, x = var_1051_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 12, 64])]; tensor var_1081_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1081_cast_fp16")]; tensor const_78_to_fp16 = const()[name = string("const_78_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_39_cast_fp16 = mul(x = var_1081_cast_fp16, y = const_78_to_fp16)[name = string("q_39_cast_fp16")]; tensor var_1087 = const()[name = string("op_1087"), val = tensor([1, 1500, 12, -1])]; tensor var_1088_cast_fp16 = reshape(shape = var_1087, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1088_cast_fp16")]; tensor const_79_to_fp16 = const()[name = string("const_79_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_49_cast_fp16 = mul(x = var_1088_cast_fp16, y = const_79_to_fp16)[name = string("k_49_cast_fp16")]; tensor var_1094 = const()[name = string("op_1094"), val = tensor([1, 1500, 12, -1])]; tensor var_1095_cast_fp16 = reshape(shape = var_1094, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1095_cast_fp16")]; tensor var_1096 = const()[name = string("op_1096"), val = tensor([0, 2, 1, 3])]; bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; tensor transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = k_49_cast_fp16)[name = string("transpose_202")]; tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = q_39_cast_fp16)[name = string("transpose_203")]; tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_115, y = transpose_116)[name = string("qk_29_cast_fp16")]; tensor var_1100_cast_fp16 = softmax(axis = var_944, x = qk_29_cast_fp16)[name = string("op_1100_cast_fp16")]; bool var_1102_transpose_x_0 = const()[name = string("op_1102_transpose_x_0"), val = bool(false)]; bool var_1102_transpose_y_0 = const()[name = string("op_1102_transpose_y_0"), val = bool(false)]; tensor v_49_cast_fp16 = transpose(perm = var_1096, x = var_1095_cast_fp16)[name = string("transpose_204")]; tensor var_1102_cast_fp16 = matmul(transpose_x = var_1102_transpose_x_0, transpose_y = var_1102_transpose_y_0, x = var_1100_cast_fp16, y = v_49_cast_fp16)[name = string("op_1102_cast_fp16")]; tensor var_1103 = const()[name = string("op_1103"), val = tensor([0, 2, 1, 3])]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 768])]; tensor var_1104_cast_fp16 = transpose(perm = var_1103, x = var_1102_cast_fp16)[name = string("transpose_201")]; tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1104_cast_fp16)[name = string("x_85_cast_fp16")]; tensor var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155135424)))]; tensor var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156315136)))]; tensor linear_37_cast_fp16 = linear(bias = var_1109_to_fp16, weight = var_1108_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_1116_axes_0 = const()[name = string("op_1116_axes_0"), val = tensor([-1])]; tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156316736)))]; tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156318336)))]; tensor var_1116_cast_fp16 = layer_norm(axes = var_1116_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1116_cast_fp16")]; tensor var_1125_to_fp16 = const()[name = string("op_1125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156319936)))]; tensor var_1126_to_fp16 = const()[name = string("op_1126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161038592)))]; tensor linear_38_cast_fp16 = linear(bias = var_1126_to_fp16, weight = var_1125_to_fp16, x = var_1116_cast_fp16)[name = string("linear_38_cast_fp16")]; string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; tensor var_1131_to_fp16 = const()[name = string("op_1131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161044800)))]; tensor var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165763456)))]; tensor linear_39_cast_fp16 = linear(bias = var_1132_to_fp16, weight = var_1131_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 768])]; tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_32)[name = string("k_cache_21_cast_fp16")]; tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 768])]; tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_33)[name = string("v_cache_21_cast_fp16")]; tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 768])]; tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 768])]; tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; int32 var_1155 = const()[name = string("op_1155"), val = int32(-1)]; tensor var_1173_axes_0 = const()[name = string("op_1173_axes_0"), val = tensor([-1])]; tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165765056)))]; tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165766656)))]; fp16 var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1173_cast_fp16 = layer_norm(axes = var_1173_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1173_cast_fp16")]; tensor var_1184_to_fp16 = const()[name = string("op_1184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165768256)))]; tensor var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166947968)))]; tensor linear_40_cast_fp16 = linear(bias = var_1185_to_fp16, weight = var_1184_to_fp16, x = var_1173_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166949568)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1188_to_fp16, x = var_1173_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168129280)))]; tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169308992)))]; tensor linear_42_cast_fp16 = linear(bias = var_1193_to_fp16, weight = var_1192_to_fp16, x = var_1173_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_1195_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1195_shape_cast_fp16")]; int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; string var_1195_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1195_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; tensor var_1195_shape_cast_fp16_to_uint16 = cast(dtype = var_1195_shape_cast_fp16_to_uint16_dtype_0, x = var_1195_shape_cast_fp16)[name = string("cast_140")]; uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1195_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_139")]; int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_32)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_34 = read_state(input = k_cache1)[name = string("coreml_update_state_34")]; tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_33)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_35 = read_state(input = v_cache1)[name = string("coreml_update_state_35")]; int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(768)]; int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; tensor var_1211_begin_0 = const()[name = string("op_1211_begin_0"), val = tensor([0, 0, 0])]; tensor var_1211_end_mask_0 = const()[name = string("op_1211_end_mask_0"), val = tensor([true, false, true])]; tensor var_1211_cast_fp16 = slice_by_index(begin = var_1211_begin_0, end = concat_120, end_mask = var_1211_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1211_cast_fp16")]; tensor var_1214_begin_0 = const()[name = string("op_1214_begin_0"), val = tensor([0, 0, 0])]; tensor var_1214_end_mask_0 = const()[name = string("op_1214_end_mask_0"), val = tensor([true, false, true])]; tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = concat_120, end_mask = var_1214_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 12, 64])]; tensor var_1224_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1224_cast_fp16")]; tensor const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_43_cast_fp16 = mul(x = var_1224_cast_fp16, y = const_80_to_fp16)[name = string("q_43_cast_fp16")]; tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 12, 64])]; tensor var_1231_cast_fp16 = reshape(shape = concat_123x, x = var_1211_cast_fp16)[name = string("op_1231_cast_fp16")]; tensor const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_55_cast_fp16 = mul(x = var_1231_cast_fp16, y = const_81_to_fp16)[name = string("k_55_cast_fp16")]; tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 12, 64])]; tensor var_1238_cast_fp16 = reshape(shape = concat_124x, x = var_1214_cast_fp16)[name = string("op_1238_cast_fp16")]; tensor var_1239 = const()[name = string("op_1239"), val = tensor([0, 2, 1, 3])]; bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; tensor transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = k_55_cast_fp16)[name = string("transpose_198")]; tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = q_43_cast_fp16)[name = string("transpose_199")]; tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_117, y = transpose_118)[name = string("qk_31_cast_fp16")]; int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; tensor var_1242_begin_0 = const()[name = string("op_1242_begin_0"), val = tensor([0, 0])]; tensor var_1242_end_mask_0 = const()[name = string("op_1242_end_mask_0"), val = tensor([false, true])]; tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = concat_125, end_mask = var_1242_end_mask_0, x = mask_to_fp16)[name = string("op_1242_cast_fp16")]; int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; tensor var_1243_begin_0 = const()[name = string("op_1243_begin_0"), val = tensor([0, 0])]; tensor var_1243_end_mask_0 = const()[name = string("op_1243_end_mask_0"), val = tensor([true, false])]; tensor var_1243_cast_fp16 = slice_by_index(begin = var_1243_begin_0, end = concat_126, end_mask = var_1243_end_mask_0, x = var_1242_cast_fp16)[name = string("op_1243_cast_fp16")]; tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1243_cast_fp16)[name = string("qk_33_cast_fp16")]; tensor var_1246_cast_fp16 = softmax(axis = var_1155, x = qk_33_cast_fp16)[name = string("op_1246_cast_fp16")]; bool var_1248_transpose_x_0 = const()[name = string("op_1248_transpose_x_0"), val = bool(false)]; bool var_1248_transpose_y_0 = const()[name = string("op_1248_transpose_y_0"), val = bool(false)]; tensor v_55_cast_fp16 = transpose(perm = var_1239, x = var_1238_cast_fp16)[name = string("transpose_200")]; tensor var_1248_cast_fp16 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = var_1246_cast_fp16, y = v_55_cast_fp16)[name = string("op_1248_cast_fp16")]; tensor var_1249 = const()[name = string("op_1249"), val = tensor([0, 2, 1, 3])]; tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 768])]; tensor var_1250_cast_fp16 = transpose(perm = var_1249, x = var_1248_cast_fp16)[name = string("transpose_197")]; tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1250_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_1254_to_fp16 = const()[name = string("op_1254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169310592)))]; tensor var_1255_to_fp16 = const()[name = string("op_1255_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170490304)))]; tensor linear_43_cast_fp16 = linear(bias = var_1255_to_fp16, weight = var_1254_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; tensor var_1262_axes_0 = const()[name = string("op_1262_axes_0"), val = tensor([-1])]; tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170491904)))]; tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170493504)))]; tensor var_1262_cast_fp16 = layer_norm(axes = var_1262_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1262_cast_fp16")]; tensor var_1271_to_fp16 = const()[name = string("op_1271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170495104)))]; tensor var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171674816)))]; tensor linear_44_cast_fp16 = linear(bias = var_1272_to_fp16, weight = var_1271_to_fp16, x = var_1262_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 12, 64])]; tensor var_1292_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1292_cast_fp16")]; tensor const_82_to_fp16 = const()[name = string("const_82_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_47_cast_fp16 = mul(x = var_1292_cast_fp16, y = const_82_to_fp16)[name = string("q_47_cast_fp16")]; tensor var_1298 = const()[name = string("op_1298"), val = tensor([1, 1500, 12, -1])]; tensor var_1299_cast_fp16 = reshape(shape = var_1298, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1299_cast_fp16")]; tensor const_83_to_fp16 = const()[name = string("const_83_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_59_cast_fp16 = mul(x = var_1299_cast_fp16, y = const_83_to_fp16)[name = string("k_59_cast_fp16")]; tensor var_1305 = const()[name = string("op_1305"), val = tensor([1, 1500, 12, -1])]; tensor var_1306_cast_fp16 = reshape(shape = var_1305, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1306_cast_fp16")]; tensor var_1307 = const()[name = string("op_1307"), val = tensor([0, 2, 1, 3])]; bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; tensor transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_120 = transpose(perm = transpose_120_perm_0, x = k_59_cast_fp16)[name = string("transpose_194")]; tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = q_47_cast_fp16)[name = string("transpose_195")]; tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_119, y = transpose_120)[name = string("qk_35_cast_fp16")]; tensor var_1311_cast_fp16 = softmax(axis = var_1155, x = qk_35_cast_fp16)[name = string("op_1311_cast_fp16")]; bool var_1313_transpose_x_0 = const()[name = string("op_1313_transpose_x_0"), val = bool(false)]; bool var_1313_transpose_y_0 = const()[name = string("op_1313_transpose_y_0"), val = bool(false)]; tensor v_59_cast_fp16 = transpose(perm = var_1307, x = var_1306_cast_fp16)[name = string("transpose_196")]; tensor var_1313_cast_fp16 = matmul(transpose_x = var_1313_transpose_x_0, transpose_y = var_1313_transpose_y_0, x = var_1311_cast_fp16, y = v_59_cast_fp16)[name = string("op_1313_cast_fp16")]; tensor var_1314 = const()[name = string("op_1314"), val = tensor([0, 2, 1, 3])]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 768])]; tensor var_1315_cast_fp16 = transpose(perm = var_1314, x = var_1313_cast_fp16)[name = string("transpose_193")]; tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1315_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171676416)))]; tensor var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172856128)))]; tensor linear_45_cast_fp16 = linear(bias = var_1320_to_fp16, weight = var_1319_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; tensor var_1327_axes_0 = const()[name = string("op_1327_axes_0"), val = tensor([-1])]; tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172857728)))]; tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172859328)))]; tensor var_1327_cast_fp16 = layer_norm(axes = var_1327_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1327_cast_fp16")]; tensor var_1336_to_fp16 = const()[name = string("op_1336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172860928)))]; tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177579584)))]; tensor linear_46_cast_fp16 = linear(bias = var_1337_to_fp16, weight = var_1336_to_fp16, x = var_1327_cast_fp16)[name = string("linear_46_cast_fp16")]; string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_1342_to_fp16 = const()[name = string("op_1342_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177585792)))]; tensor var_1343_to_fp16 = const()[name = string("op_1343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182304448)))]; tensor linear_47_cast_fp16 = linear(bias = var_1343_to_fp16, weight = var_1342_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 768])]; tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_34)[name = string("k_cache_25_cast_fp16")]; tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 768])]; tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_35)[name = string("v_cache_25_cast_fp16")]; tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 768])]; tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 768])]; tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; int32 var_1366 = const()[name = string("op_1366"), val = int32(-1)]; tensor var_1384_axes_0 = const()[name = string("op_1384_axes_0"), val = tensor([-1])]; tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182306048)))]; tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182307648)))]; fp16 var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1384_cast_fp16 = layer_norm(axes = var_1384_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1384_cast_fp16")]; tensor var_1395_to_fp16 = const()[name = string("op_1395_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182309248)))]; tensor var_1396_to_fp16 = const()[name = string("op_1396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183488960)))]; tensor linear_48_cast_fp16 = linear(bias = var_1396_to_fp16, weight = var_1395_to_fp16, x = var_1384_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183490560)))]; tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1399_to_fp16, x = var_1384_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor var_1403_to_fp16 = const()[name = string("op_1403_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184670272)))]; tensor var_1404_to_fp16 = const()[name = string("op_1404_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185849984)))]; tensor linear_50_cast_fp16 = linear(bias = var_1404_to_fp16, weight = var_1403_to_fp16, x = var_1384_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor var_1406_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1406_shape_cast_fp16")]; int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; string var_1406_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1406_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; tensor var_1406_shape_cast_fp16_to_uint16 = cast(dtype = var_1406_shape_cast_fp16_to_uint16_dtype_0, x = var_1406_shape_cast_fp16)[name = string("cast_138")]; uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1406_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_137")]; int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_34)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = k_cache1)[name = string("coreml_update_state_36")]; tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_35)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = v_cache1)[name = string("coreml_update_state_37")]; int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(768)]; int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; tensor var_1422_begin_0 = const()[name = string("op_1422_begin_0"), val = tensor([0, 0, 0])]; tensor var_1422_end_mask_0 = const()[name = string("op_1422_end_mask_0"), val = tensor([true, false, true])]; tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = concat_142, end_mask = var_1422_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1422_cast_fp16")]; tensor var_1425_begin_0 = const()[name = string("op_1425_begin_0"), val = tensor([0, 0, 0])]; tensor var_1425_end_mask_0 = const()[name = string("op_1425_end_mask_0"), val = tensor([true, false, true])]; tensor var_1425_cast_fp16 = slice_by_index(begin = var_1425_begin_0, end = concat_142, end_mask = var_1425_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1425_cast_fp16")]; tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 12, 64])]; tensor var_1435_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1435_cast_fp16")]; tensor const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_51_cast_fp16 = mul(x = var_1435_cast_fp16, y = const_84_to_fp16)[name = string("q_51_cast_fp16")]; tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 12, 64])]; tensor var_1442_cast_fp16 = reshape(shape = concat_145x, x = var_1422_cast_fp16)[name = string("op_1442_cast_fp16")]; tensor const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_65_cast_fp16 = mul(x = var_1442_cast_fp16, y = const_85_to_fp16)[name = string("k_65_cast_fp16")]; tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 12, 64])]; tensor var_1449_cast_fp16 = reshape(shape = concat_146x, x = var_1425_cast_fp16)[name = string("op_1449_cast_fp16")]; tensor var_1450 = const()[name = string("op_1450"), val = tensor([0, 2, 1, 3])]; bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; tensor transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_122 = transpose(perm = transpose_122_perm_0, x = k_65_cast_fp16)[name = string("transpose_190")]; tensor transpose_121 = transpose(perm = transpose_121_perm_0, x = q_51_cast_fp16)[name = string("transpose_191")]; tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_121, y = transpose_122)[name = string("qk_37_cast_fp16")]; int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; tensor var_1453_begin_0 = const()[name = string("op_1453_begin_0"), val = tensor([0, 0])]; tensor var_1453_end_mask_0 = const()[name = string("op_1453_end_mask_0"), val = tensor([false, true])]; tensor var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = concat_147, end_mask = var_1453_end_mask_0, x = mask_to_fp16)[name = string("op_1453_cast_fp16")]; int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; tensor var_1454_begin_0 = const()[name = string("op_1454_begin_0"), val = tensor([0, 0])]; tensor var_1454_end_mask_0 = const()[name = string("op_1454_end_mask_0"), val = tensor([true, false])]; tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = concat_148, end_mask = var_1454_end_mask_0, x = var_1453_cast_fp16)[name = string("op_1454_cast_fp16")]; tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1454_cast_fp16)[name = string("qk_39_cast_fp16")]; tensor var_1457_cast_fp16 = softmax(axis = var_1366, x = qk_39_cast_fp16)[name = string("op_1457_cast_fp16")]; bool var_1459_transpose_x_0 = const()[name = string("op_1459_transpose_x_0"), val = bool(false)]; bool var_1459_transpose_y_0 = const()[name = string("op_1459_transpose_y_0"), val = bool(false)]; tensor v_65_cast_fp16 = transpose(perm = var_1450, x = var_1449_cast_fp16)[name = string("transpose_192")]; tensor var_1459_cast_fp16 = matmul(transpose_x = var_1459_transpose_x_0, transpose_y = var_1459_transpose_y_0, x = var_1457_cast_fp16, y = v_65_cast_fp16)[name = string("op_1459_cast_fp16")]; tensor var_1460 = const()[name = string("op_1460"), val = tensor([0, 2, 1, 3])]; tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 768])]; tensor var_1461_cast_fp16 = transpose(perm = var_1460, x = var_1459_cast_fp16)[name = string("transpose_189")]; tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1461_cast_fp16)[name = string("x_115_cast_fp16")]; tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185851584)))]; tensor var_1466_to_fp16 = const()[name = string("op_1466_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187031296)))]; tensor linear_51_cast_fp16 = linear(bias = var_1466_to_fp16, weight = var_1465_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; tensor var_1473_axes_0 = const()[name = string("op_1473_axes_0"), val = tensor([-1])]; tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187032896)))]; tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187034496)))]; tensor var_1473_cast_fp16 = layer_norm(axes = var_1473_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1473_cast_fp16")]; tensor var_1482_to_fp16 = const()[name = string("op_1482_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187036096)))]; tensor var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188215808)))]; tensor linear_52_cast_fp16 = linear(bias = var_1483_to_fp16, weight = var_1482_to_fp16, x = var_1473_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 12, 64])]; tensor var_1503_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1503_cast_fp16")]; tensor const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_55_cast_fp16 = mul(x = var_1503_cast_fp16, y = const_86_to_fp16)[name = string("q_55_cast_fp16")]; tensor var_1509 = const()[name = string("op_1509"), val = tensor([1, 1500, 12, -1])]; tensor var_1510_cast_fp16 = reshape(shape = var_1509, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1510_cast_fp16")]; tensor const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_69_cast_fp16 = mul(x = var_1510_cast_fp16, y = const_87_to_fp16)[name = string("k_69_cast_fp16")]; tensor var_1516 = const()[name = string("op_1516"), val = tensor([1, 1500, 12, -1])]; tensor var_1517_cast_fp16 = reshape(shape = var_1516, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1517_cast_fp16")]; tensor var_1518 = const()[name = string("op_1518"), val = tensor([0, 2, 1, 3])]; bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; tensor transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_124 = transpose(perm = transpose_124_perm_0, x = k_69_cast_fp16)[name = string("transpose_186")]; tensor transpose_123 = transpose(perm = transpose_123_perm_0, x = q_55_cast_fp16)[name = string("transpose_187")]; tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_123, y = transpose_124)[name = string("qk_41_cast_fp16")]; tensor var_1522_cast_fp16 = softmax(axis = var_1366, x = qk_41_cast_fp16)[name = string("op_1522_cast_fp16")]; bool var_1524_transpose_x_0 = const()[name = string("op_1524_transpose_x_0"), val = bool(false)]; bool var_1524_transpose_y_0 = const()[name = string("op_1524_transpose_y_0"), val = bool(false)]; tensor v_69_cast_fp16 = transpose(perm = var_1518, x = var_1517_cast_fp16)[name = string("transpose_188")]; tensor var_1524_cast_fp16 = matmul(transpose_x = var_1524_transpose_x_0, transpose_y = var_1524_transpose_y_0, x = var_1522_cast_fp16, y = v_69_cast_fp16)[name = string("op_1524_cast_fp16")]; tensor var_1525 = const()[name = string("op_1525"), val = tensor([0, 2, 1, 3])]; tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 768])]; tensor var_1526_cast_fp16 = transpose(perm = var_1525, x = var_1524_cast_fp16)[name = string("transpose_185")]; tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1526_cast_fp16)[name = string("x_121_cast_fp16")]; tensor var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188217408)))]; tensor var_1531_to_fp16 = const()[name = string("op_1531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189397120)))]; tensor linear_53_cast_fp16 = linear(bias = var_1531_to_fp16, weight = var_1530_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; tensor var_1538_axes_0 = const()[name = string("op_1538_axes_0"), val = tensor([-1])]; tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189398720)))]; tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189400320)))]; tensor var_1538_cast_fp16 = layer_norm(axes = var_1538_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1538_cast_fp16")]; tensor var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189401920)))]; tensor var_1548_to_fp16 = const()[name = string("op_1548_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194120576)))]; tensor linear_54_cast_fp16 = linear(bias = var_1548_to_fp16, weight = var_1547_to_fp16, x = var_1538_cast_fp16)[name = string("linear_54_cast_fp16")]; string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; tensor var_1553_to_fp16 = const()[name = string("op_1553_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194126784)))]; tensor var_1554_to_fp16 = const()[name = string("op_1554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198845440)))]; tensor linear_55_cast_fp16 = linear(bias = var_1554_to_fp16, weight = var_1553_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 768])]; tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_36)[name = string("k_cache_29_cast_fp16")]; tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 768])]; tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_37)[name = string("v_cache_29_cast_fp16")]; tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 768])]; tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 768])]; tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; int32 var_1577 = const()[name = string("op_1577"), val = int32(-1)]; tensor var_1595_axes_0 = const()[name = string("op_1595_axes_0"), val = tensor([-1])]; tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198847040)))]; tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198848640)))]; fp16 var_1583_to_fp16 = const()[name = string("op_1583_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1595_cast_fp16 = layer_norm(axes = var_1595_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1595_cast_fp16")]; tensor var_1606_to_fp16 = const()[name = string("op_1606_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198850240)))]; tensor var_1607_to_fp16 = const()[name = string("op_1607_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200029952)))]; tensor linear_56_cast_fp16 = linear(bias = var_1607_to_fp16, weight = var_1606_to_fp16, x = var_1595_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200031552)))]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1610_to_fp16, x = var_1595_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201211264)))]; tensor var_1615_to_fp16 = const()[name = string("op_1615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202390976)))]; tensor linear_58_cast_fp16 = linear(bias = var_1615_to_fp16, weight = var_1614_to_fp16, x = var_1595_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor var_1617_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1617_shape_cast_fp16")]; int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; string var_1617_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1617_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; tensor var_1617_shape_cast_fp16_to_uint16 = cast(dtype = var_1617_shape_cast_fp16_to_uint16_dtype_0, x = var_1617_shape_cast_fp16)[name = string("cast_136")]; uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1617_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_135")]; int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_36)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = k_cache1)[name = string("coreml_update_state_38")]; tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_37)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = v_cache1)[name = string("coreml_update_state_39")]; int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(768)]; int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; tensor var_1633_begin_0 = const()[name = string("op_1633_begin_0"), val = tensor([0, 0, 0])]; tensor var_1633_end_mask_0 = const()[name = string("op_1633_end_mask_0"), val = tensor([true, false, true])]; tensor var_1633_cast_fp16 = slice_by_index(begin = var_1633_begin_0, end = concat_164, end_mask = var_1633_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1633_cast_fp16")]; tensor var_1636_begin_0 = const()[name = string("op_1636_begin_0"), val = tensor([0, 0, 0])]; tensor var_1636_end_mask_0 = const()[name = string("op_1636_end_mask_0"), val = tensor([true, false, true])]; tensor var_1636_cast_fp16 = slice_by_index(begin = var_1636_begin_0, end = concat_164, end_mask = var_1636_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1636_cast_fp16")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 12, 64])]; tensor var_1646_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1646_cast_fp16")]; tensor const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_59_cast_fp16 = mul(x = var_1646_cast_fp16, y = const_88_to_fp16)[name = string("q_59_cast_fp16")]; tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 12, 64])]; tensor var_1653_cast_fp16 = reshape(shape = concat_167x, x = var_1633_cast_fp16)[name = string("op_1653_cast_fp16")]; tensor const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_75_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_89_to_fp16)[name = string("k_75_cast_fp16")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 12, 64])]; tensor var_1660_cast_fp16 = reshape(shape = concat_168x, x = var_1636_cast_fp16)[name = string("op_1660_cast_fp16")]; tensor var_1661 = const()[name = string("op_1661"), val = tensor([0, 2, 1, 3])]; bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; tensor transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_126 = transpose(perm = transpose_126_perm_0, x = k_75_cast_fp16)[name = string("transpose_182")]; tensor transpose_125 = transpose(perm = transpose_125_perm_0, x = q_59_cast_fp16)[name = string("transpose_183")]; tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_125, y = transpose_126)[name = string("qk_43_cast_fp16")]; int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; tensor var_1664_begin_0 = const()[name = string("op_1664_begin_0"), val = tensor([0, 0])]; tensor var_1664_end_mask_0 = const()[name = string("op_1664_end_mask_0"), val = tensor([false, true])]; tensor var_1664_cast_fp16 = slice_by_index(begin = var_1664_begin_0, end = concat_169, end_mask = var_1664_end_mask_0, x = mask_to_fp16)[name = string("op_1664_cast_fp16")]; int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; tensor var_1665_begin_0 = const()[name = string("op_1665_begin_0"), val = tensor([0, 0])]; tensor var_1665_end_mask_0 = const()[name = string("op_1665_end_mask_0"), val = tensor([true, false])]; tensor var_1665_cast_fp16 = slice_by_index(begin = var_1665_begin_0, end = concat_170, end_mask = var_1665_end_mask_0, x = var_1664_cast_fp16)[name = string("op_1665_cast_fp16")]; tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1665_cast_fp16)[name = string("qk_45_cast_fp16")]; tensor var_1668_cast_fp16 = softmax(axis = var_1577, x = qk_45_cast_fp16)[name = string("op_1668_cast_fp16")]; bool var_1670_transpose_x_0 = const()[name = string("op_1670_transpose_x_0"), val = bool(false)]; bool var_1670_transpose_y_0 = const()[name = string("op_1670_transpose_y_0"), val = bool(false)]; tensor v_75_cast_fp16 = transpose(perm = var_1661, x = var_1660_cast_fp16)[name = string("transpose_184")]; tensor var_1670_cast_fp16 = matmul(transpose_x = var_1670_transpose_x_0, transpose_y = var_1670_transpose_y_0, x = var_1668_cast_fp16, y = v_75_cast_fp16)[name = string("op_1670_cast_fp16")]; tensor var_1671 = const()[name = string("op_1671"), val = tensor([0, 2, 1, 3])]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 768])]; tensor var_1672_cast_fp16 = transpose(perm = var_1671, x = var_1670_cast_fp16)[name = string("transpose_181")]; tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1672_cast_fp16)[name = string("x_133_cast_fp16")]; tensor var_1676_to_fp16 = const()[name = string("op_1676_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202392576)))]; tensor var_1677_to_fp16 = const()[name = string("op_1677_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203572288)))]; tensor linear_59_cast_fp16 = linear(bias = var_1677_to_fp16, weight = var_1676_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; tensor var_1684_axes_0 = const()[name = string("op_1684_axes_0"), val = tensor([-1])]; tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203573888)))]; tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203575488)))]; tensor var_1684_cast_fp16 = layer_norm(axes = var_1684_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1684_cast_fp16")]; tensor var_1693_to_fp16 = const()[name = string("op_1693_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203577088)))]; tensor var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204756800)))]; tensor linear_60_cast_fp16 = linear(bias = var_1694_to_fp16, weight = var_1693_to_fp16, x = var_1684_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 12, 64])]; tensor var_1714_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1714_cast_fp16")]; tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_63_cast_fp16 = mul(x = var_1714_cast_fp16, y = const_90_to_fp16)[name = string("q_63_cast_fp16")]; tensor var_1720 = const()[name = string("op_1720"), val = tensor([1, 1500, 12, -1])]; tensor var_1721_cast_fp16 = reshape(shape = var_1720, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1721_cast_fp16")]; tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_79_cast_fp16 = mul(x = var_1721_cast_fp16, y = const_91_to_fp16)[name = string("k_79_cast_fp16")]; tensor var_1727 = const()[name = string("op_1727"), val = tensor([1, 1500, 12, -1])]; tensor var_1728_cast_fp16 = reshape(shape = var_1727, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1728_cast_fp16")]; tensor var_1729 = const()[name = string("op_1729"), val = tensor([0, 2, 1, 3])]; bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; tensor transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_128 = transpose(perm = transpose_128_perm_0, x = k_79_cast_fp16)[name = string("transpose_178")]; tensor transpose_127 = transpose(perm = transpose_127_perm_0, x = q_63_cast_fp16)[name = string("transpose_179")]; tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_127, y = transpose_128)[name = string("qk_47_cast_fp16")]; tensor var_1733_cast_fp16 = softmax(axis = var_1577, x = qk_47_cast_fp16)[name = string("op_1733_cast_fp16")]; bool var_1735_transpose_x_0 = const()[name = string("op_1735_transpose_x_0"), val = bool(false)]; bool var_1735_transpose_y_0 = const()[name = string("op_1735_transpose_y_0"), val = bool(false)]; tensor v_79_cast_fp16 = transpose(perm = var_1729, x = var_1728_cast_fp16)[name = string("transpose_180")]; tensor var_1735_cast_fp16 = matmul(transpose_x = var_1735_transpose_x_0, transpose_y = var_1735_transpose_y_0, x = var_1733_cast_fp16, y = v_79_cast_fp16)[name = string("op_1735_cast_fp16")]; tensor var_1736 = const()[name = string("op_1736"), val = tensor([0, 2, 1, 3])]; tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 768])]; tensor var_1737_cast_fp16 = transpose(perm = var_1736, x = var_1735_cast_fp16)[name = string("transpose_177")]; tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1737_cast_fp16)[name = string("x_139_cast_fp16")]; tensor var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204758400)))]; tensor var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205938112)))]; tensor linear_61_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; tensor var_1749_axes_0 = const()[name = string("op_1749_axes_0"), val = tensor([-1])]; tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205939712)))]; tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205941312)))]; tensor var_1749_cast_fp16 = layer_norm(axes = var_1749_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1749_cast_fp16")]; tensor var_1758_to_fp16 = const()[name = string("op_1758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205942912)))]; tensor var_1759_to_fp16 = const()[name = string("op_1759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210661568)))]; tensor linear_62_cast_fp16 = linear(bias = var_1759_to_fp16, weight = var_1758_to_fp16, x = var_1749_cast_fp16)[name = string("linear_62_cast_fp16")]; string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; tensor var_1764_to_fp16 = const()[name = string("op_1764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210667776)))]; tensor var_1765_to_fp16 = const()[name = string("op_1765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215386432)))]; tensor linear_63_cast_fp16 = linear(bias = var_1765_to_fp16, weight = var_1764_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 768])]; tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_38)[name = string("k_cache_33_cast_fp16")]; tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 768])]; tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_39)[name = string("v_cache_33_cast_fp16")]; tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 768])]; tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 768])]; tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; int32 var_1788 = const()[name = string("op_1788"), val = int32(-1)]; tensor var_1806_axes_0 = const()[name = string("op_1806_axes_0"), val = tensor([-1])]; tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215388032)))]; tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215389632)))]; fp16 var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1806_cast_fp16 = layer_norm(axes = var_1806_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1806_cast_fp16")]; tensor var_1817_to_fp16 = const()[name = string("op_1817_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215391232)))]; tensor var_1818_to_fp16 = const()[name = string("op_1818_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216570944)))]; tensor linear_64_cast_fp16 = linear(bias = var_1818_to_fp16, weight = var_1817_to_fp16, x = var_1806_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216572544)))]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1821_to_fp16, x = var_1806_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217752256)))]; tensor var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218931968)))]; tensor linear_66_cast_fp16 = linear(bias = var_1826_to_fp16, weight = var_1825_to_fp16, x = var_1806_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor var_1828_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1828_shape_cast_fp16")]; int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; string var_1828_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1828_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; tensor var_1828_shape_cast_fp16_to_uint16 = cast(dtype = var_1828_shape_cast_fp16_to_uint16_dtype_0, x = var_1828_shape_cast_fp16)[name = string("cast_134")]; uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1828_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_133")]; int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_38)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = k_cache1)[name = string("coreml_update_state_40")]; tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_39)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = v_cache1)[name = string("coreml_update_state_41")]; int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(768)]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; tensor var_1844_begin_0 = const()[name = string("op_1844_begin_0"), val = tensor([0, 0, 0])]; tensor var_1844_end_mask_0 = const()[name = string("op_1844_end_mask_0"), val = tensor([true, false, true])]; tensor var_1844_cast_fp16 = slice_by_index(begin = var_1844_begin_0, end = concat_186, end_mask = var_1844_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1844_cast_fp16")]; tensor var_1847_begin_0 = const()[name = string("op_1847_begin_0"), val = tensor([0, 0, 0])]; tensor var_1847_end_mask_0 = const()[name = string("op_1847_end_mask_0"), val = tensor([true, false, true])]; tensor var_1847_cast_fp16 = slice_by_index(begin = var_1847_begin_0, end = concat_186, end_mask = var_1847_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1847_cast_fp16")]; tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 12, 64])]; tensor var_1857_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1857_cast_fp16")]; tensor const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_67_cast_fp16 = mul(x = var_1857_cast_fp16, y = const_92_to_fp16)[name = string("q_67_cast_fp16")]; tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 12, 64])]; tensor var_1864_cast_fp16 = reshape(shape = concat_189x, x = var_1844_cast_fp16)[name = string("op_1864_cast_fp16")]; tensor const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_85_cast_fp16 = mul(x = var_1864_cast_fp16, y = const_93_to_fp16)[name = string("k_85_cast_fp16")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 12, 64])]; tensor var_1871_cast_fp16 = reshape(shape = concat_190x, x = var_1847_cast_fp16)[name = string("op_1871_cast_fp16")]; tensor var_1872 = const()[name = string("op_1872"), val = tensor([0, 2, 1, 3])]; bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; tensor transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_130 = transpose(perm = transpose_130_perm_0, x = k_85_cast_fp16)[name = string("transpose_174")]; tensor transpose_129 = transpose(perm = transpose_129_perm_0, x = q_67_cast_fp16)[name = string("transpose_175")]; tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_129, y = transpose_130)[name = string("qk_49_cast_fp16")]; int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; tensor var_1875_begin_0 = const()[name = string("op_1875_begin_0"), val = tensor([0, 0])]; tensor var_1875_end_mask_0 = const()[name = string("op_1875_end_mask_0"), val = tensor([false, true])]; tensor var_1875_cast_fp16 = slice_by_index(begin = var_1875_begin_0, end = concat_191, end_mask = var_1875_end_mask_0, x = mask_to_fp16)[name = string("op_1875_cast_fp16")]; int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; tensor var_1876_begin_0 = const()[name = string("op_1876_begin_0"), val = tensor([0, 0])]; tensor var_1876_end_mask_0 = const()[name = string("op_1876_end_mask_0"), val = tensor([true, false])]; tensor var_1876_cast_fp16 = slice_by_index(begin = var_1876_begin_0, end = concat_192, end_mask = var_1876_end_mask_0, x = var_1875_cast_fp16)[name = string("op_1876_cast_fp16")]; tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1876_cast_fp16)[name = string("qk_51_cast_fp16")]; tensor var_1879_cast_fp16 = softmax(axis = var_1788, x = qk_51_cast_fp16)[name = string("op_1879_cast_fp16")]; bool var_1881_transpose_x_0 = const()[name = string("op_1881_transpose_x_0"), val = bool(false)]; bool var_1881_transpose_y_0 = const()[name = string("op_1881_transpose_y_0"), val = bool(false)]; tensor v_85_cast_fp16 = transpose(perm = var_1872, x = var_1871_cast_fp16)[name = string("transpose_176")]; tensor var_1881_cast_fp16 = matmul(transpose_x = var_1881_transpose_x_0, transpose_y = var_1881_transpose_y_0, x = var_1879_cast_fp16, y = v_85_cast_fp16)[name = string("op_1881_cast_fp16")]; tensor var_1882 = const()[name = string("op_1882"), val = tensor([0, 2, 1, 3])]; tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 768])]; tensor var_1883_cast_fp16 = transpose(perm = var_1882, x = var_1881_cast_fp16)[name = string("transpose_173")]; tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1883_cast_fp16)[name = string("x_151_cast_fp16")]; tensor var_1887_to_fp16 = const()[name = string("op_1887_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218933568)))]; tensor var_1888_to_fp16 = const()[name = string("op_1888_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220113280)))]; tensor linear_67_cast_fp16 = linear(bias = var_1888_to_fp16, weight = var_1887_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; tensor var_1895_axes_0 = const()[name = string("op_1895_axes_0"), val = tensor([-1])]; tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220114880)))]; tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220116480)))]; tensor var_1895_cast_fp16 = layer_norm(axes = var_1895_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1895_cast_fp16")]; tensor var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220118080)))]; tensor var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221297792)))]; tensor linear_68_cast_fp16 = linear(bias = var_1905_to_fp16, weight = var_1904_to_fp16, x = var_1895_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 12, 64])]; tensor var_1925_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_1925_cast_fp16")]; tensor const_94_to_fp16 = const()[name = string("const_94_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_71_cast_fp16 = mul(x = var_1925_cast_fp16, y = const_94_to_fp16)[name = string("q_71_cast_fp16")]; tensor var_1931 = const()[name = string("op_1931"), val = tensor([1, 1500, 12, -1])]; tensor var_1932_cast_fp16 = reshape(shape = var_1931, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1932_cast_fp16")]; tensor const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_89_cast_fp16 = mul(x = var_1932_cast_fp16, y = const_95_to_fp16)[name = string("k_89_cast_fp16")]; tensor var_1938 = const()[name = string("op_1938"), val = tensor([1, 1500, 12, -1])]; tensor var_1939_cast_fp16 = reshape(shape = var_1938, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1939_cast_fp16")]; tensor var_1940 = const()[name = string("op_1940"), val = tensor([0, 2, 1, 3])]; bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; tensor transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_132 = transpose(perm = transpose_132_perm_0, x = k_89_cast_fp16)[name = string("transpose_170")]; tensor transpose_131 = transpose(perm = transpose_131_perm_0, x = q_71_cast_fp16)[name = string("transpose_171")]; tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_131, y = transpose_132)[name = string("qk_53_cast_fp16")]; tensor var_1944_cast_fp16 = softmax(axis = var_1788, x = qk_53_cast_fp16)[name = string("op_1944_cast_fp16")]; bool var_1946_transpose_x_0 = const()[name = string("op_1946_transpose_x_0"), val = bool(false)]; bool var_1946_transpose_y_0 = const()[name = string("op_1946_transpose_y_0"), val = bool(false)]; tensor v_89_cast_fp16 = transpose(perm = var_1940, x = var_1939_cast_fp16)[name = string("transpose_172")]; tensor var_1946_cast_fp16 = matmul(transpose_x = var_1946_transpose_x_0, transpose_y = var_1946_transpose_y_0, x = var_1944_cast_fp16, y = v_89_cast_fp16)[name = string("op_1946_cast_fp16")]; tensor var_1947 = const()[name = string("op_1947"), val = tensor([0, 2, 1, 3])]; tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 768])]; tensor var_1948_cast_fp16 = transpose(perm = var_1947, x = var_1946_cast_fp16)[name = string("transpose_169")]; tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_1948_cast_fp16)[name = string("x_157_cast_fp16")]; tensor var_1952_to_fp16 = const()[name = string("op_1952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221299392)))]; tensor var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222479104)))]; tensor linear_69_cast_fp16 = linear(bias = var_1953_to_fp16, weight = var_1952_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; tensor var_1960_axes_0 = const()[name = string("op_1960_axes_0"), val = tensor([-1])]; tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222480704)))]; tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222482304)))]; tensor var_1960_cast_fp16 = layer_norm(axes = var_1960_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_1960_cast_fp16")]; tensor var_1969_to_fp16 = const()[name = string("op_1969_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222483904)))]; tensor var_1970_to_fp16 = const()[name = string("op_1970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227202560)))]; tensor linear_70_cast_fp16 = linear(bias = var_1970_to_fp16, weight = var_1969_to_fp16, x = var_1960_cast_fp16)[name = string("linear_70_cast_fp16")]; string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; tensor var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227208768)))]; tensor var_1976_to_fp16 = const()[name = string("op_1976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231927424)))]; tensor linear_71_cast_fp16 = linear(bias = var_1976_to_fp16, weight = var_1975_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 768])]; tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_40)[name = string("k_cache_37_cast_fp16")]; tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 768])]; tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_41)[name = string("v_cache_37_cast_fp16")]; tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 768])]; tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 768])]; tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; int32 var_1999 = const()[name = string("op_1999"), val = int32(-1)]; tensor var_2017_axes_0 = const()[name = string("op_2017_axes_0"), val = tensor([-1])]; tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231929024)))]; tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231930624)))]; fp16 var_2005_to_fp16 = const()[name = string("op_2005_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2017_cast_fp16 = layer_norm(axes = var_2017_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2017_cast_fp16")]; tensor var_2028_to_fp16 = const()[name = string("op_2028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231932224)))]; tensor var_2029_to_fp16 = const()[name = string("op_2029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233111936)))]; tensor linear_72_cast_fp16 = linear(bias = var_2029_to_fp16, weight = var_2028_to_fp16, x = var_2017_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233113536)))]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2032_to_fp16, x = var_2017_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor var_2036_to_fp16 = const()[name = string("op_2036_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234293248)))]; tensor var_2037_to_fp16 = const()[name = string("op_2037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235472960)))]; tensor linear_74_cast_fp16 = linear(bias = var_2037_to_fp16, weight = var_2036_to_fp16, x = var_2017_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2039_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2039_shape_cast_fp16")]; int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; string var_2039_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2039_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; tensor var_2039_shape_cast_fp16_to_uint16 = cast(dtype = var_2039_shape_cast_fp16_to_uint16_dtype_0, x = var_2039_shape_cast_fp16)[name = string("cast_132")]; uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2039_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_131")]; int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_40)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = k_cache1)[name = string("coreml_update_state_42")]; tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_41)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = v_cache1)[name = string("coreml_update_state_43")]; int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(768)]; int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; tensor var_2055_begin_0 = const()[name = string("op_2055_begin_0"), val = tensor([0, 0, 0])]; tensor var_2055_end_mask_0 = const()[name = string("op_2055_end_mask_0"), val = tensor([true, false, true])]; tensor var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = concat_208, end_mask = var_2055_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2055_cast_fp16")]; tensor var_2058_begin_0 = const()[name = string("op_2058_begin_0"), val = tensor([0, 0, 0])]; tensor var_2058_end_mask_0 = const()[name = string("op_2058_end_mask_0"), val = tensor([true, false, true])]; tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = concat_208, end_mask = var_2058_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2058_cast_fp16")]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 12, 64])]; tensor var_2068_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2068_cast_fp16")]; tensor const_96_to_fp16 = const()[name = string("const_96_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_75_cast_fp16 = mul(x = var_2068_cast_fp16, y = const_96_to_fp16)[name = string("q_75_cast_fp16")]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 12, 64])]; tensor var_2075_cast_fp16 = reshape(shape = concat_211x, x = var_2055_cast_fp16)[name = string("op_2075_cast_fp16")]; tensor const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_95_cast_fp16 = mul(x = var_2075_cast_fp16, y = const_97_to_fp16)[name = string("k_95_cast_fp16")]; tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 12, 64])]; tensor var_2082_cast_fp16 = reshape(shape = concat_212x, x = var_2058_cast_fp16)[name = string("op_2082_cast_fp16")]; tensor var_2083 = const()[name = string("op_2083"), val = tensor([0, 2, 1, 3])]; bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; tensor transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_134 = transpose(perm = transpose_134_perm_0, x = k_95_cast_fp16)[name = string("transpose_166")]; tensor transpose_133 = transpose(perm = transpose_133_perm_0, x = q_75_cast_fp16)[name = string("transpose_167")]; tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_133, y = transpose_134)[name = string("qk_55_cast_fp16")]; int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; tensor var_2086_begin_0 = const()[name = string("op_2086_begin_0"), val = tensor([0, 0])]; tensor var_2086_end_mask_0 = const()[name = string("op_2086_end_mask_0"), val = tensor([false, true])]; tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = concat_213, end_mask = var_2086_end_mask_0, x = mask_to_fp16)[name = string("op_2086_cast_fp16")]; int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; tensor var_2087_begin_0 = const()[name = string("op_2087_begin_0"), val = tensor([0, 0])]; tensor var_2087_end_mask_0 = const()[name = string("op_2087_end_mask_0"), val = tensor([true, false])]; tensor var_2087_cast_fp16 = slice_by_index(begin = var_2087_begin_0, end = concat_214, end_mask = var_2087_end_mask_0, x = var_2086_cast_fp16)[name = string("op_2087_cast_fp16")]; tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2087_cast_fp16)[name = string("qk_57_cast_fp16")]; tensor var_2090_cast_fp16 = softmax(axis = var_1999, x = qk_57_cast_fp16)[name = string("op_2090_cast_fp16")]; bool var_2092_transpose_x_0 = const()[name = string("op_2092_transpose_x_0"), val = bool(false)]; bool var_2092_transpose_y_0 = const()[name = string("op_2092_transpose_y_0"), val = bool(false)]; tensor v_95_cast_fp16 = transpose(perm = var_2083, x = var_2082_cast_fp16)[name = string("transpose_168")]; tensor var_2092_cast_fp16 = matmul(transpose_x = var_2092_transpose_x_0, transpose_y = var_2092_transpose_y_0, x = var_2090_cast_fp16, y = v_95_cast_fp16)[name = string("op_2092_cast_fp16")]; tensor var_2093 = const()[name = string("op_2093"), val = tensor([0, 2, 1, 3])]; tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 768])]; tensor var_2094_cast_fp16 = transpose(perm = var_2093, x = var_2092_cast_fp16)[name = string("transpose_165")]; tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2094_cast_fp16)[name = string("x_169_cast_fp16")]; tensor var_2098_to_fp16 = const()[name = string("op_2098_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235474560)))]; tensor var_2099_to_fp16 = const()[name = string("op_2099_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236654272)))]; tensor linear_75_cast_fp16 = linear(bias = var_2099_to_fp16, weight = var_2098_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; tensor var_2106_axes_0 = const()[name = string("op_2106_axes_0"), val = tensor([-1])]; tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236655872)))]; tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236657472)))]; tensor var_2106_cast_fp16 = layer_norm(axes = var_2106_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor var_2115_to_fp16 = const()[name = string("op_2115_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236659072)))]; tensor var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237838784)))]; tensor linear_76_cast_fp16 = linear(bias = var_2116_to_fp16, weight = var_2115_to_fp16, x = var_2106_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 12, 64])]; tensor var_2136_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2136_cast_fp16")]; tensor const_98_to_fp16 = const()[name = string("const_98_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_79_cast_fp16 = mul(x = var_2136_cast_fp16, y = const_98_to_fp16)[name = string("q_79_cast_fp16")]; tensor var_2142 = const()[name = string("op_2142"), val = tensor([1, 1500, 12, -1])]; tensor var_2143_cast_fp16 = reshape(shape = var_2142, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2143_cast_fp16")]; tensor const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_99_cast_fp16 = mul(x = var_2143_cast_fp16, y = const_99_to_fp16)[name = string("k_99_cast_fp16")]; tensor var_2149 = const()[name = string("op_2149"), val = tensor([1, 1500, 12, -1])]; tensor var_2150_cast_fp16 = reshape(shape = var_2149, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2150_cast_fp16")]; tensor var_2151 = const()[name = string("op_2151"), val = tensor([0, 2, 1, 3])]; bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; tensor transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_136 = transpose(perm = transpose_136_perm_0, x = k_99_cast_fp16)[name = string("transpose_162")]; tensor transpose_135 = transpose(perm = transpose_135_perm_0, x = q_79_cast_fp16)[name = string("transpose_163")]; tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_135, y = transpose_136)[name = string("qk_59_cast_fp16")]; tensor var_2155_cast_fp16 = softmax(axis = var_1999, x = qk_59_cast_fp16)[name = string("op_2155_cast_fp16")]; bool var_2157_transpose_x_0 = const()[name = string("op_2157_transpose_x_0"), val = bool(false)]; bool var_2157_transpose_y_0 = const()[name = string("op_2157_transpose_y_0"), val = bool(false)]; tensor v_99_cast_fp16 = transpose(perm = var_2151, x = var_2150_cast_fp16)[name = string("transpose_164")]; tensor var_2157_cast_fp16 = matmul(transpose_x = var_2157_transpose_x_0, transpose_y = var_2157_transpose_y_0, x = var_2155_cast_fp16, y = v_99_cast_fp16)[name = string("op_2157_cast_fp16")]; tensor var_2158 = const()[name = string("op_2158"), val = tensor([0, 2, 1, 3])]; tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 768])]; tensor var_2159_cast_fp16 = transpose(perm = var_2158, x = var_2157_cast_fp16)[name = string("transpose_161")]; tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2159_cast_fp16)[name = string("x_175_cast_fp16")]; tensor var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237840384)))]; tensor var_2164_to_fp16 = const()[name = string("op_2164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239020096)))]; tensor linear_77_cast_fp16 = linear(bias = var_2164_to_fp16, weight = var_2163_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; tensor var_2171_axes_0 = const()[name = string("op_2171_axes_0"), val = tensor([-1])]; tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239021696)))]; tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239023296)))]; tensor var_2171_cast_fp16 = layer_norm(axes = var_2171_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2171_cast_fp16")]; tensor var_2180_to_fp16 = const()[name = string("op_2180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239024896)))]; tensor var_2181_to_fp16 = const()[name = string("op_2181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243743552)))]; tensor linear_78_cast_fp16 = linear(bias = var_2181_to_fp16, weight = var_2180_to_fp16, x = var_2171_cast_fp16)[name = string("linear_78_cast_fp16")]; string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243749760)))]; tensor var_2187_to_fp16 = const()[name = string("op_2187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248468416)))]; tensor linear_79_cast_fp16 = linear(bias = var_2187_to_fp16, weight = var_2186_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 768])]; tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_42)[name = string("k_cache_41_cast_fp16")]; tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 768])]; tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_43)[name = string("v_cache_41_cast_fp16")]; tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 768])]; tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 768])]; tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; int32 var_2210 = const()[name = string("op_2210"), val = int32(-1)]; tensor var_2228_axes_0 = const()[name = string("op_2228_axes_0"), val = tensor([-1])]; tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248470016)))]; tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248471616)))]; fp16 var_2216_to_fp16 = const()[name = string("op_2216_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2228_cast_fp16 = layer_norm(axes = var_2228_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2228_cast_fp16")]; tensor var_2239_to_fp16 = const()[name = string("op_2239_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248473216)))]; tensor var_2240_to_fp16 = const()[name = string("op_2240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249652928)))]; tensor linear_80_cast_fp16 = linear(bias = var_2240_to_fp16, weight = var_2239_to_fp16, x = var_2228_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249654528)))]; tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2243_to_fp16, x = var_2228_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2247_to_fp16 = const()[name = string("op_2247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250834240)))]; tensor var_2248_to_fp16 = const()[name = string("op_2248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252013952)))]; tensor linear_82_cast_fp16 = linear(bias = var_2248_to_fp16, weight = var_2247_to_fp16, x = var_2228_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor var_2250_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2250_shape_cast_fp16")]; int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; string var_2250_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2250_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; tensor var_2250_shape_cast_fp16_to_uint16 = cast(dtype = var_2250_shape_cast_fp16_to_uint16_dtype_0, x = var_2250_shape_cast_fp16)[name = string("cast_130")]; uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2250_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_129")]; int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_42)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = k_cache1)[name = string("coreml_update_state_44")]; tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_43)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = v_cache1)[name = string("coreml_update_state_45")]; int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(768)]; int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; tensor var_2266_begin_0 = const()[name = string("op_2266_begin_0"), val = tensor([0, 0, 0])]; tensor var_2266_end_mask_0 = const()[name = string("op_2266_end_mask_0"), val = tensor([true, false, true])]; tensor var_2266_cast_fp16 = slice_by_index(begin = var_2266_begin_0, end = concat_230, end_mask = var_2266_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2266_cast_fp16")]; tensor var_2269_begin_0 = const()[name = string("op_2269_begin_0"), val = tensor([0, 0, 0])]; tensor var_2269_end_mask_0 = const()[name = string("op_2269_end_mask_0"), val = tensor([true, false, true])]; tensor var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = concat_230, end_mask = var_2269_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2269_cast_fp16")]; tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 12, 64])]; tensor var_2279_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2279_cast_fp16")]; tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_83_cast_fp16 = mul(x = var_2279_cast_fp16, y = const_100_to_fp16)[name = string("q_83_cast_fp16")]; tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 12, 64])]; tensor var_2286_cast_fp16 = reshape(shape = concat_233x, x = var_2266_cast_fp16)[name = string("op_2286_cast_fp16")]; tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_105_cast_fp16 = mul(x = var_2286_cast_fp16, y = const_101_to_fp16)[name = string("k_105_cast_fp16")]; tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 12, 64])]; tensor var_2293_cast_fp16 = reshape(shape = concat_234x, x = var_2269_cast_fp16)[name = string("op_2293_cast_fp16")]; tensor var_2294 = const()[name = string("op_2294"), val = tensor([0, 2, 1, 3])]; bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; tensor transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_138 = transpose(perm = transpose_138_perm_0, x = k_105_cast_fp16)[name = string("transpose_158")]; tensor transpose_137 = transpose(perm = transpose_137_perm_0, x = q_83_cast_fp16)[name = string("transpose_159")]; tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_137, y = transpose_138)[name = string("qk_61_cast_fp16")]; int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; tensor var_2297_begin_0 = const()[name = string("op_2297_begin_0"), val = tensor([0, 0])]; tensor var_2297_end_mask_0 = const()[name = string("op_2297_end_mask_0"), val = tensor([false, true])]; tensor var_2297_cast_fp16 = slice_by_index(begin = var_2297_begin_0, end = concat_235, end_mask = var_2297_end_mask_0, x = mask_to_fp16)[name = string("op_2297_cast_fp16")]; int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; tensor var_2298_begin_0 = const()[name = string("op_2298_begin_0"), val = tensor([0, 0])]; tensor var_2298_end_mask_0 = const()[name = string("op_2298_end_mask_0"), val = tensor([true, false])]; tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = concat_236, end_mask = var_2298_end_mask_0, x = var_2297_cast_fp16)[name = string("op_2298_cast_fp16")]; tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2298_cast_fp16)[name = string("qk_63_cast_fp16")]; tensor var_2301_cast_fp16 = softmax(axis = var_2210, x = qk_63_cast_fp16)[name = string("op_2301_cast_fp16")]; bool var_2303_transpose_x_0 = const()[name = string("op_2303_transpose_x_0"), val = bool(false)]; bool var_2303_transpose_y_0 = const()[name = string("op_2303_transpose_y_0"), val = bool(false)]; tensor v_105_cast_fp16 = transpose(perm = var_2294, x = var_2293_cast_fp16)[name = string("transpose_160")]; tensor var_2303_cast_fp16 = matmul(transpose_x = var_2303_transpose_x_0, transpose_y = var_2303_transpose_y_0, x = var_2301_cast_fp16, y = v_105_cast_fp16)[name = string("op_2303_cast_fp16")]; tensor var_2304 = const()[name = string("op_2304"), val = tensor([0, 2, 1, 3])]; tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 768])]; tensor var_2305_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_157")]; tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2305_cast_fp16)[name = string("x_187_cast_fp16")]; tensor var_2309_to_fp16 = const()[name = string("op_2309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252015552)))]; tensor var_2310_to_fp16 = const()[name = string("op_2310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253195264)))]; tensor linear_83_cast_fp16 = linear(bias = var_2310_to_fp16, weight = var_2309_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; tensor var_2317_axes_0 = const()[name = string("op_2317_axes_0"), val = tensor([-1])]; tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253196864)))]; tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253198464)))]; tensor var_2317_cast_fp16 = layer_norm(axes = var_2317_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2317_cast_fp16")]; tensor var_2326_to_fp16 = const()[name = string("op_2326_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253200064)))]; tensor var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254379776)))]; tensor linear_84_cast_fp16 = linear(bias = var_2327_to_fp16, weight = var_2326_to_fp16, x = var_2317_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 12, 64])]; tensor var_2347_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2347_cast_fp16")]; tensor const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_87_cast_fp16 = mul(x = var_2347_cast_fp16, y = const_102_to_fp16)[name = string("q_87_cast_fp16")]; tensor var_2353 = const()[name = string("op_2353"), val = tensor([1, 1500, 12, -1])]; tensor var_2354_cast_fp16 = reshape(shape = var_2353, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2354_cast_fp16")]; tensor const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_109_cast_fp16 = mul(x = var_2354_cast_fp16, y = const_103_to_fp16)[name = string("k_109_cast_fp16")]; tensor var_2360 = const()[name = string("op_2360"), val = tensor([1, 1500, 12, -1])]; tensor var_2361_cast_fp16 = reshape(shape = var_2360, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2361_cast_fp16")]; tensor var_2362 = const()[name = string("op_2362"), val = tensor([0, 2, 1, 3])]; bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; tensor transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_140 = transpose(perm = transpose_140_perm_0, x = k_109_cast_fp16)[name = string("transpose_154")]; tensor transpose_139 = transpose(perm = transpose_139_perm_0, x = q_87_cast_fp16)[name = string("transpose_155")]; tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_139, y = transpose_140)[name = string("qk_65_cast_fp16")]; tensor var_2366_cast_fp16 = softmax(axis = var_2210, x = qk_65_cast_fp16)[name = string("op_2366_cast_fp16")]; bool var_2368_transpose_x_0 = const()[name = string("op_2368_transpose_x_0"), val = bool(false)]; bool var_2368_transpose_y_0 = const()[name = string("op_2368_transpose_y_0"), val = bool(false)]; tensor v_109_cast_fp16 = transpose(perm = var_2362, x = var_2361_cast_fp16)[name = string("transpose_156")]; tensor var_2368_cast_fp16 = matmul(transpose_x = var_2368_transpose_x_0, transpose_y = var_2368_transpose_y_0, x = var_2366_cast_fp16, y = v_109_cast_fp16)[name = string("op_2368_cast_fp16")]; tensor var_2369 = const()[name = string("op_2369"), val = tensor([0, 2, 1, 3])]; tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 768])]; tensor var_2370_cast_fp16 = transpose(perm = var_2369, x = var_2368_cast_fp16)[name = string("transpose_153")]; tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2370_cast_fp16)[name = string("x_193_cast_fp16")]; tensor var_2374_to_fp16 = const()[name = string("op_2374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254381376)))]; tensor var_2375_to_fp16 = const()[name = string("op_2375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255561088)))]; tensor linear_85_cast_fp16 = linear(bias = var_2375_to_fp16, weight = var_2374_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; tensor var_2382_axes_0 = const()[name = string("op_2382_axes_0"), val = tensor([-1])]; tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255562688)))]; tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255564288)))]; tensor var_2382_cast_fp16 = layer_norm(axes = var_2382_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2382_cast_fp16")]; tensor var_2391_to_fp16 = const()[name = string("op_2391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255565888)))]; tensor var_2392_to_fp16 = const()[name = string("op_2392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260284544)))]; tensor linear_86_cast_fp16 = linear(bias = var_2392_to_fp16, weight = var_2391_to_fp16, x = var_2382_cast_fp16)[name = string("linear_86_cast_fp16")]; string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; tensor var_2397_to_fp16 = const()[name = string("op_2397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260290752)))]; tensor var_2398_to_fp16 = const()[name = string("op_2398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265009408)))]; tensor linear_87_cast_fp16 = linear(bias = var_2398_to_fp16, weight = var_2397_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 768])]; tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_44)[name = string("k_cache_45_cast_fp16")]; tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 768])]; tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_45)[name = string("v_cache_45_cast_fp16")]; tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([11, 0, 0, 0])]; tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([12, 1, 1500, 768])]; tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([11, 0, 0, 0])]; tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([12, 1, 1500, 768])]; tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; int32 var_2421 = const()[name = string("op_2421"), val = int32(-1)]; tensor var_2439_axes_0 = const()[name = string("op_2439_axes_0"), val = tensor([-1])]; tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265011008)))]; tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265012608)))]; fp16 var_2427_to_fp16 = const()[name = string("op_2427_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2439_cast_fp16 = layer_norm(axes = var_2439_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2439_cast_fp16")]; tensor var_2450_to_fp16 = const()[name = string("op_2450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265014208)))]; tensor var_2451_to_fp16 = const()[name = string("op_2451_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266193920)))]; tensor linear_88_cast_fp16 = linear(bias = var_2451_to_fp16, weight = var_2450_to_fp16, x = var_2439_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266195520)))]; tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2454_to_fp16, x = var_2439_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor var_2458_to_fp16 = const()[name = string("op_2458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267375232)))]; tensor var_2459_to_fp16 = const()[name = string("op_2459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268554944)))]; tensor linear_90_cast_fp16 = linear(bias = var_2459_to_fp16, weight = var_2458_to_fp16, x = var_2439_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor var_2461_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2461_shape_cast_fp16")]; int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; string var_2461_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2461_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; tensor var_2461_shape_cast_fp16_to_uint16 = cast(dtype = var_2461_shape_cast_fp16_to_uint16_dtype_0, x = var_2461_shape_cast_fp16)[name = string("cast_128")]; uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2461_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_127")]; int32 end_step = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step")]; tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step)[name = string("expand_dims_179")]; tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_44)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_46_write_state")]; tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_45)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_47_write_state")]; int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(768)]; int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step, concat_252_values2_0))[name = string("concat_252")]; tensor var_2477_begin_0 = const()[name = string("op_2477_begin_0"), val = tensor([0, 0, 0])]; tensor var_2477_end_mask_0 = const()[name = string("op_2477_end_mask_0"), val = tensor([true, false, true])]; tensor var_2477_cast_fp16 = slice_by_index(begin = var_2477_begin_0, end = concat_252, end_mask = var_2477_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2477_cast_fp16")]; tensor var_2480_begin_0 = const()[name = string("op_2480_begin_0"), val = tensor([0, 0, 0])]; tensor var_2480_end_mask_0 = const()[name = string("op_2480_end_mask_0"), val = tensor([true, false, true])]; tensor var_2480_cast_fp16 = slice_by_index(begin = var_2480_begin_0, end = concat_252, end_mask = var_2480_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2480_cast_fp16")]; tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 12, 64])]; tensor var_2490_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2490_cast_fp16")]; tensor const_104_to_fp16 = const()[name = string("const_104_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_91_cast_fp16 = mul(x = var_2490_cast_fp16, y = const_104_to_fp16)[name = string("q_91_cast_fp16")]; tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 12, 64])]; tensor var_2497_cast_fp16 = reshape(shape = concat_255x, x = var_2477_cast_fp16)[name = string("op_2497_cast_fp16")]; tensor const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_115_cast_fp16 = mul(x = var_2497_cast_fp16, y = const_105_to_fp16)[name = string("k_115_cast_fp16")]; tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 12, 64])]; tensor var_2504_cast_fp16 = reshape(shape = concat_256x, x = var_2480_cast_fp16)[name = string("op_2504_cast_fp16")]; tensor var_2505 = const()[name = string("op_2505"), val = tensor([0, 2, 1, 3])]; bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; tensor transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_142 = transpose(perm = transpose_142_perm_0, x = k_115_cast_fp16)[name = string("transpose_150")]; tensor transpose_141 = transpose(perm = transpose_141_perm_0, x = q_91_cast_fp16)[name = string("transpose_151")]; tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_141, y = transpose_142)[name = string("qk_67_cast_fp16")]; int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; tensor var_2508_begin_0 = const()[name = string("op_2508_begin_0"), val = tensor([0, 0])]; tensor var_2508_end_mask_0 = const()[name = string("op_2508_end_mask_0"), val = tensor([false, true])]; tensor var_2508_cast_fp16 = slice_by_index(begin = var_2508_begin_0, end = concat_257, end_mask = var_2508_end_mask_0, x = mask_to_fp16)[name = string("op_2508_cast_fp16")]; int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; tensor var_2509_begin_0 = const()[name = string("op_2509_begin_0"), val = tensor([0, 0])]; tensor var_2509_end_mask_0 = const()[name = string("op_2509_end_mask_0"), val = tensor([true, false])]; tensor var_2509_cast_fp16 = slice_by_index(begin = var_2509_begin_0, end = concat_258, end_mask = var_2509_end_mask_0, x = var_2508_cast_fp16)[name = string("op_2509_cast_fp16")]; tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2509_cast_fp16)[name = string("qk_69_cast_fp16")]; tensor var_2512_cast_fp16 = softmax(axis = var_2421, x = qk_69_cast_fp16)[name = string("op_2512_cast_fp16")]; bool var_2514_transpose_x_0 = const()[name = string("op_2514_transpose_x_0"), val = bool(false)]; bool var_2514_transpose_y_0 = const()[name = string("op_2514_transpose_y_0"), val = bool(false)]; tensor v_115_cast_fp16 = transpose(perm = var_2505, x = var_2504_cast_fp16)[name = string("transpose_152")]; tensor var_2514_cast_fp16 = matmul(transpose_x = var_2514_transpose_x_0, transpose_y = var_2514_transpose_y_0, x = var_2512_cast_fp16, y = v_115_cast_fp16)[name = string("op_2514_cast_fp16")]; tensor var_2515 = const()[name = string("op_2515"), val = tensor([0, 2, 1, 3])]; tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 768])]; tensor var_2516_cast_fp16 = transpose(perm = var_2515, x = var_2514_cast_fp16)[name = string("transpose_149")]; tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2516_cast_fp16)[name = string("x_205_cast_fp16")]; tensor var_2520_to_fp16 = const()[name = string("op_2520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268556544)))]; tensor var_2521_to_fp16 = const()[name = string("op_2521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269736256)))]; tensor linear_91_cast_fp16 = linear(bias = var_2521_to_fp16, weight = var_2520_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; tensor var_2528_axes_0 = const()[name = string("op_2528_axes_0"), val = tensor([-1])]; tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269737856)))]; tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269739456)))]; tensor var_2528_cast_fp16 = layer_norm(axes = var_2528_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2528_cast_fp16")]; tensor var_2537_to_fp16 = const()[name = string("op_2537_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269741056)))]; tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270920768)))]; tensor linear_92_cast_fp16 = linear(bias = var_2538_to_fp16, weight = var_2537_to_fp16, x = var_2528_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 12, 64])]; tensor var_2558_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2558_cast_fp16")]; tensor const_106_to_fp16 = const()[name = string("const_106_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_cast_fp16 = mul(x = var_2558_cast_fp16, y = const_106_to_fp16)[name = string("q_cast_fp16")]; tensor var_2564 = const()[name = string("op_2564"), val = tensor([1, 1500, 12, -1])]; tensor var_2565_cast_fp16 = reshape(shape = var_2564, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2565_cast_fp16")]; tensor const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_cast_fp16 = mul(x = var_2565_cast_fp16, y = const_107_to_fp16)[name = string("k_cast_fp16")]; tensor var_2571 = const()[name = string("op_2571"), val = tensor([1, 1500, 12, -1])]; tensor var_2572_cast_fp16 = reshape(shape = var_2571, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2572_cast_fp16")]; tensor var_2573 = const()[name = string("op_2573"), val = tensor([0, 2, 1, 3])]; bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; tensor transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_144_perm_0 = const()[name = string("transpose_144_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_144 = transpose(perm = transpose_144_perm_0, x = k_cast_fp16)[name = string("transpose_146")]; tensor transpose_143 = transpose(perm = transpose_143_perm_0, x = q_cast_fp16)[name = string("transpose_147")]; tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_143, y = transpose_144)[name = string("qk_cast_fp16")]; tensor var_2577_cast_fp16 = softmax(axis = var_2421, x = qk_cast_fp16)[name = string("op_2577_cast_fp16")]; bool var_2579_transpose_x_0 = const()[name = string("op_2579_transpose_x_0"), val = bool(false)]; bool var_2579_transpose_y_0 = const()[name = string("op_2579_transpose_y_0"), val = bool(false)]; tensor v_cast_fp16 = transpose(perm = var_2573, x = var_2572_cast_fp16)[name = string("transpose_148")]; tensor var_2579_cast_fp16 = matmul(transpose_x = var_2579_transpose_x_0, transpose_y = var_2579_transpose_y_0, x = var_2577_cast_fp16, y = v_cast_fp16)[name = string("op_2579_cast_fp16")]; tensor var_2580 = const()[name = string("op_2580"), val = tensor([0, 2, 1, 3])]; tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 768])]; tensor var_2581_cast_fp16 = transpose(perm = var_2580, x = var_2579_cast_fp16)[name = string("transpose_145")]; tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2581_cast_fp16)[name = string("x_211_cast_fp16")]; tensor var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270922368)))]; tensor var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272102080)))]; tensor linear_93_cast_fp16 = linear(bias = var_2586_to_fp16, weight = var_2585_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; tensor var_2593_axes_0 = const()[name = string("op_2593_axes_0"), val = tensor([-1])]; tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272103680)))]; tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272105280)))]; tensor var_2593_cast_fp16 = layer_norm(axes = var_2593_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2593_cast_fp16")]; tensor var_2602_to_fp16 = const()[name = string("op_2602_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272106880)))]; tensor var_2603_to_fp16 = const()[name = string("op_2603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276825536)))]; tensor linear_94_cast_fp16 = linear(bias = var_2603_to_fp16, weight = var_2602_to_fp16, x = var_2593_cast_fp16)[name = string("linear_94_cast_fp16")]; string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; tensor var_2608_to_fp16 = const()[name = string("op_2608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276831744)))]; tensor var_2609_to_fp16 = const()[name = string("op_2609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281550400)))]; tensor linear_95_cast_fp16 = linear(bias = var_2609_to_fp16, weight = var_2608_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; tensor var_2622_axes_0 = const()[name = string("op_2622_axes_0"), val = tensor([-1])]; tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281552000)))]; tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281553600)))]; fp16 var_2613_to_fp16 = const()[name = string("op_2613_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_2622_cast_fp16 = layer_norm(axes = var_2622_axes_0, beta = ln_bias_to_fp16, epsilon = var_2613_to_fp16, gamma = ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2622_cast_fp16")]; tensor var_2632_bias_0_to_fp16 = const()[name = string("op_2632_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281555200)))]; tensor logits = linear(bias = var_2632_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_2622_cast_fp16)[name = string("op_2632_cast_fp16")]; } -> (logits); }