program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] { func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { tensor var_26_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_26_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_26_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_26_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; tensor var_26_shape_cast_fp16_to_int16 = cast(dtype = var_26_shape_cast_fp16_to_int16_dtype_0, x = var_26_shape_cast_fp16)[name = string("cast_82")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_26_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_30_shape = shape(x = token_data)[name = string("op_30_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_30_shape_to_uint16_dtype_0 = const()[name = string("op_30_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_30_shape_to_uint16 = cast(dtype = var_30_shape_to_uint16_dtype_0, x = var_30_shape)[name = string("cast_80")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_30_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_79")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_81")]; int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; int32 var_50_axis_0 = const()[name = string("op_50_axis_0"), val = int32(0)]; int32 var_50_batch_dims_0 = const()[name = string("op_50_batch_dims_0"), val = int32(0)]; bool var_50_validate_indices_0 = const()[name = string("op_50_validate_indices_0"), val = bool(false)]; tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = token_data, validate_indices = var_50_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_50_cast_fp16")]; int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(512)]; int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; tensor var_53_end_mask_0 = const()[name = string("op_53_end_mask_0"), val = tensor([false, true])]; tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53109888)))]; tensor var_53_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_53_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_53_cast_fp16")]; tensor x_3_cast_fp16 = add(x = var_50_cast_fp16, y = var_53_cast_fp16)[name = string("x_3_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 512])]; tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 512])]; tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 512])]; tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 512])]; tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; int32 var_76 = const()[name = string("op_76"), val = int32(-1)]; tensor var_94_axes_0 = const()[name = string("op_94_axes_0"), val = tensor([-1])]; tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53568704)))]; tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53569792)))]; fp16 var_82_to_fp16 = const()[name = string("op_82_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_94_cast_fp16 = layer_norm(axes = var_94_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_94_cast_fp16")]; tensor var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53570880)))]; tensor var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54095232)))]; tensor linear_0_cast_fp16 = linear(bias = var_106_to_fp16, weight = var_105_to_fp16, x = var_94_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54096320)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54620672)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_109_to_fp16, x = var_94_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54621760)))]; tensor var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55146112)))]; tensor linear_2_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = var_94_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_116_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_116_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_78")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_77")]; int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")]; tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")]; int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(512)]; int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; tensor var_132_begin_0 = const()[name = string("op_132_begin_0"), val = tensor([0, 0, 0])]; tensor var_132_end_mask_0 = const()[name = string("op_132_end_mask_0"), val = tensor([true, false, true])]; tensor var_132_cast_fp16 = slice_by_index(begin = var_132_begin_0, end = concat_10, end_mask = var_132_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_132_cast_fp16")]; tensor var_135_begin_0 = const()[name = string("op_135_begin_0"), val = tensor([0, 0, 0])]; tensor var_135_end_mask_0 = const()[name = string("op_135_end_mask_0"), val = tensor([true, false, true])]; tensor var_135_cast_fp16 = slice_by_index(begin = var_135_begin_0, end = concat_10, end_mask = var_135_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_135_cast_fp16")]; tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 8, 64])]; tensor var_145_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_145_cast_fp16")]; tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_3_cast_fp16 = mul(x = var_145_cast_fp16, y = const_30_to_fp16)[name = string("q_3_cast_fp16")]; tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 8, 64])]; tensor var_152_cast_fp16 = reshape(shape = concat_13x, x = var_132_cast_fp16)[name = string("op_152_cast_fp16")]; tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_5_cast_fp16 = mul(x = var_152_cast_fp16, y = const_31_to_fp16)[name = string("k_5_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 8, 64])]; tensor var_159_cast_fp16 = reshape(shape = concat_14x, x = var_135_cast_fp16)[name = string("op_159_cast_fp16")]; tensor var_160 = const()[name = string("op_160"), val = tensor([0, 2, 1, 3])]; bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = k_5_cast_fp16)[name = string("transpose_118")]; tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = q_3_cast_fp16)[name = string("transpose_119")]; tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_49, y = transpose_50)[name = string("qk_1_cast_fp16")]; int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; tensor var_163_begin_0 = const()[name = string("op_163_begin_0"), val = tensor([0, 0])]; tensor var_163_end_mask_0 = const()[name = string("op_163_end_mask_0"), val = tensor([false, true])]; tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55147200)))]; tensor var_163_cast_fp16 = slice_by_index(begin = var_163_begin_0, end = concat_15, end_mask = var_163_end_mask_0, x = mask_to_fp16)[name = string("op_163_cast_fp16")]; int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; tensor var_164_begin_0 = const()[name = string("op_164_begin_0"), val = tensor([0, 0])]; tensor var_164_end_mask_0 = const()[name = string("op_164_end_mask_0"), val = tensor([true, false])]; tensor var_164_cast_fp16 = slice_by_index(begin = var_164_begin_0, end = concat_16, end_mask = var_164_end_mask_0, x = var_163_cast_fp16)[name = string("op_164_cast_fp16")]; tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_164_cast_fp16)[name = string("qk_3_cast_fp16")]; tensor var_167_cast_fp16 = softmax(axis = var_76, x = qk_3_cast_fp16)[name = string("op_167_cast_fp16")]; bool var_169_transpose_x_0 = const()[name = string("op_169_transpose_x_0"), val = bool(false)]; bool var_169_transpose_y_0 = const()[name = string("op_169_transpose_y_0"), val = bool(false)]; tensor v_5_cast_fp16 = transpose(perm = var_160, x = var_159_cast_fp16)[name = string("transpose_120")]; tensor var_169_cast_fp16 = matmul(transpose_x = var_169_transpose_x_0, transpose_y = var_169_transpose_y_0, x = var_167_cast_fp16, y = v_5_cast_fp16)[name = string("op_169_cast_fp16")]; tensor var_170 = const()[name = string("op_170"), val = tensor([0, 2, 1, 3])]; tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 512])]; tensor var_171_cast_fp16 = transpose(perm = var_170, x = var_169_cast_fp16)[name = string("transpose_117")]; tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_171_cast_fp16)[name = string("x_7_cast_fp16")]; tensor var_175_to_fp16 = const()[name = string("op_175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55548672)))]; tensor var_176_to_fp16 = const()[name = string("op_176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56073024)))]; tensor linear_3_cast_fp16 = linear(bias = var_176_to_fp16, weight = var_175_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_183_axes_0 = const()[name = string("op_183_axes_0"), val = tensor([-1])]; tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56074112)))]; tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56075200)))]; tensor var_183_cast_fp16 = layer_norm(axes = var_183_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_183_cast_fp16")]; tensor var_192_to_fp16 = const()[name = string("op_192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56076288)))]; tensor var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56600640)))]; tensor linear_4_cast_fp16 = linear(bias = var_193_to_fp16, weight = var_192_to_fp16, x = var_183_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56601728)))]; tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 8, 64])]; tensor var_213_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_213_cast_fp16")]; tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_7_cast_fp16 = mul(x = var_213_cast_fp16, y = const_32_to_fp16)[name = string("q_7_cast_fp16")]; tensor var_219 = const()[name = string("op_219"), val = tensor([1, 1500, 8, -1])]; tensor var_220_cast_fp16 = reshape(shape = var_219, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_220_cast_fp16")]; tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_9_cast_fp16 = mul(x = var_220_cast_fp16, y = const_33_to_fp16)[name = string("k_9_cast_fp16")]; tensor var_226 = const()[name = string("op_226"), val = tensor([1, 1500, 8, -1])]; tensor var_227_cast_fp16 = reshape(shape = var_226, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_227_cast_fp16")]; tensor var_228 = const()[name = string("op_228"), val = tensor([0, 2, 1, 3])]; bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = k_9_cast_fp16)[name = string("transpose_114")]; tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = q_7_cast_fp16)[name = string("transpose_115")]; tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_51, y = transpose_52)[name = string("qk_5_cast_fp16")]; tensor var_232_cast_fp16 = softmax(axis = var_76, x = qk_5_cast_fp16)[name = string("op_232_cast_fp16")]; bool var_234_transpose_x_0 = const()[name = string("op_234_transpose_x_0"), val = bool(false)]; bool var_234_transpose_y_0 = const()[name = string("op_234_transpose_y_0"), val = bool(false)]; tensor v_9_cast_fp16 = transpose(perm = var_228, x = var_227_cast_fp16)[name = string("transpose_116")]; tensor var_234_cast_fp16 = matmul(transpose_x = var_234_transpose_x_0, transpose_y = var_234_transpose_y_0, x = var_232_cast_fp16, y = v_9_cast_fp16)[name = string("op_234_cast_fp16")]; tensor var_235 = const()[name = string("op_235"), val = tensor([0, 2, 1, 3])]; tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 512])]; tensor var_236_cast_fp16 = transpose(perm = var_235, x = var_234_cast_fp16)[name = string("transpose_113")]; tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_236_cast_fp16)[name = string("x_13_cast_fp16")]; tensor var_240_to_fp16 = const()[name = string("op_240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58137792)))]; tensor var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58662144)))]; tensor linear_5_cast_fp16 = linear(bias = var_241_to_fp16, weight = var_240_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; tensor var_248_axes_0 = const()[name = string("op_248_axes_0"), val = tensor([-1])]; tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58663232)))]; tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58664320)))]; tensor var_248_cast_fp16 = layer_norm(axes = var_248_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_248_cast_fp16")]; tensor var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58665408)))]; tensor var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60762624)))]; tensor linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_248_cast_fp16)[name = string("linear_6_cast_fp16")]; string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; tensor var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60766784)))]; tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62864000)))]; tensor linear_7_cast_fp16 = linear(bias = var_264_to_fp16, weight = var_263_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 512])]; tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_5_cast_fp16")]; tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 512])]; tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_5_cast_fp16")]; tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 512])]; tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 512])]; tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; int32 var_287 = const()[name = string("op_287"), val = int32(-1)]; tensor var_305_axes_0 = const()[name = string("op_305_axes_0"), val = tensor([-1])]; tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62865088)))]; tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62866176)))]; fp16 var_293_to_fp16 = const()[name = string("op_293_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_305_cast_fp16 = layer_norm(axes = var_305_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_305_cast_fp16")]; tensor var_316_to_fp16 = const()[name = string("op_316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62867264)))]; tensor var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63391616)))]; tensor linear_8_cast_fp16 = linear(bias = var_317_to_fp16, weight = var_316_to_fp16, x = var_305_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63392704)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_320_to_fp16, x = var_305_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63917056)))]; tensor var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64441408)))]; tensor linear_10_cast_fp16 = linear(bias = var_325_to_fp16, weight = var_324_to_fp16, x = var_305_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_327_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_327_shape_cast_fp16")]; int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; string var_327_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_327_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; tensor var_327_shape_cast_fp16_to_uint16 = cast(dtype = var_327_shape_cast_fp16_to_uint16_dtype_0, x = var_327_shape_cast_fp16)[name = string("cast_76")]; uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_327_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_75")]; int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_14 = read_state(input = k_cache1)[name = string("coreml_update_state_14")]; tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_15 = read_state(input = v_cache1)[name = string("coreml_update_state_15")]; int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(512)]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; tensor var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor([0, 0, 0])]; tensor var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor([true, false, true])]; tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = concat_32, end_mask = var_343_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_343_cast_fp16")]; tensor var_346_begin_0 = const()[name = string("op_346_begin_0"), val = tensor([0, 0, 0])]; tensor var_346_end_mask_0 = const()[name = string("op_346_end_mask_0"), val = tensor([true, false, true])]; tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = concat_32, end_mask = var_346_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_346_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 8, 64])]; tensor var_356_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_356_cast_fp16")]; tensor const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_11_cast_fp16 = mul(x = var_356_cast_fp16, y = const_34_to_fp16)[name = string("q_11_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 8, 64])]; tensor var_363_cast_fp16 = reshape(shape = concat_35x, x = var_343_cast_fp16)[name = string("op_363_cast_fp16")]; tensor const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_15_cast_fp16 = mul(x = var_363_cast_fp16, y = const_35_to_fp16)[name = string("k_15_cast_fp16")]; tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 8, 64])]; tensor var_370_cast_fp16 = reshape(shape = concat_36x, x = var_346_cast_fp16)[name = string("op_370_cast_fp16")]; tensor var_371 = const()[name = string("op_371"), val = tensor([0, 2, 1, 3])]; bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = k_15_cast_fp16)[name = string("transpose_110")]; tensor transpose_53 = transpose(perm = transpose_53_perm_0, x = q_11_cast_fp16)[name = string("transpose_111")]; tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_53, y = transpose_54)[name = string("qk_7_cast_fp16")]; int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; tensor var_374_begin_0 = const()[name = string("op_374_begin_0"), val = tensor([0, 0])]; tensor var_374_end_mask_0 = const()[name = string("op_374_end_mask_0"), val = tensor([false, true])]; tensor var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = concat_37, end_mask = var_374_end_mask_0, x = mask_to_fp16)[name = string("op_374_cast_fp16")]; int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; tensor var_375_begin_0 = const()[name = string("op_375_begin_0"), val = tensor([0, 0])]; tensor var_375_end_mask_0 = const()[name = string("op_375_end_mask_0"), val = tensor([true, false])]; tensor var_375_cast_fp16 = slice_by_index(begin = var_375_begin_0, end = concat_38, end_mask = var_375_end_mask_0, x = var_374_cast_fp16)[name = string("op_375_cast_fp16")]; tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_375_cast_fp16)[name = string("qk_9_cast_fp16")]; tensor var_378_cast_fp16 = softmax(axis = var_287, x = qk_9_cast_fp16)[name = string("op_378_cast_fp16")]; bool var_380_transpose_x_0 = const()[name = string("op_380_transpose_x_0"), val = bool(false)]; bool var_380_transpose_y_0 = const()[name = string("op_380_transpose_y_0"), val = bool(false)]; tensor v_15_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_112")]; tensor var_380_cast_fp16 = matmul(transpose_x = var_380_transpose_x_0, transpose_y = var_380_transpose_y_0, x = var_378_cast_fp16, y = v_15_cast_fp16)[name = string("op_380_cast_fp16")]; tensor var_381 = const()[name = string("op_381"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 512])]; tensor var_382_cast_fp16 = transpose(perm = var_381, x = var_380_cast_fp16)[name = string("transpose_109")]; tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_382_cast_fp16)[name = string("x_25_cast_fp16")]; tensor var_386_to_fp16 = const()[name = string("op_386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64442496)))]; tensor var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64966848)))]; tensor linear_11_cast_fp16 = linear(bias = var_387_to_fp16, weight = var_386_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; tensor var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor([-1])]; tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64967936)))]; tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64969024)))]; tensor var_394_cast_fp16 = layer_norm(axes = var_394_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_394_cast_fp16")]; tensor var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64970112)))]; tensor var_404_to_fp16 = const()[name = string("op_404_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65494464)))]; tensor linear_12_cast_fp16 = linear(bias = var_404_to_fp16, weight = var_403_to_fp16, x = var_394_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 8, 64])]; tensor var_424_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_424_cast_fp16")]; tensor const_36_to_fp16 = const()[name = string("const_36_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_15_cast_fp16 = mul(x = var_424_cast_fp16, y = const_36_to_fp16)[name = string("q_15_cast_fp16")]; tensor var_430 = const()[name = string("op_430"), val = tensor([1, 1500, 8, -1])]; tensor var_431_cast_fp16 = reshape(shape = var_430, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_431_cast_fp16")]; tensor const_37_to_fp16 = const()[name = string("const_37_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_19_cast_fp16 = mul(x = var_431_cast_fp16, y = const_37_to_fp16)[name = string("k_19_cast_fp16")]; tensor var_437 = const()[name = string("op_437"), val = tensor([1, 1500, 8, -1])]; tensor var_438_cast_fp16 = reshape(shape = var_437, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_438_cast_fp16")]; tensor var_439 = const()[name = string("op_439"), val = tensor([0, 2, 1, 3])]; bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_56 = transpose(perm = transpose_56_perm_0, x = k_19_cast_fp16)[name = string("transpose_106")]; tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = q_15_cast_fp16)[name = string("transpose_107")]; tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_55, y = transpose_56)[name = string("qk_11_cast_fp16")]; tensor var_443_cast_fp16 = softmax(axis = var_287, x = qk_11_cast_fp16)[name = string("op_443_cast_fp16")]; bool var_445_transpose_x_0 = const()[name = string("op_445_transpose_x_0"), val = bool(false)]; bool var_445_transpose_y_0 = const()[name = string("op_445_transpose_y_0"), val = bool(false)]; tensor v_19_cast_fp16 = transpose(perm = var_439, x = var_438_cast_fp16)[name = string("transpose_108")]; tensor var_445_cast_fp16 = matmul(transpose_x = var_445_transpose_x_0, transpose_y = var_445_transpose_y_0, x = var_443_cast_fp16, y = v_19_cast_fp16)[name = string("op_445_cast_fp16")]; tensor var_446 = const()[name = string("op_446"), val = tensor([0, 2, 1, 3])]; tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 512])]; tensor var_447_cast_fp16 = transpose(perm = var_446, x = var_445_cast_fp16)[name = string("transpose_105")]; tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_447_cast_fp16)[name = string("x_31_cast_fp16")]; tensor var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65495552)))]; tensor var_452_to_fp16 = const()[name = string("op_452_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66019904)))]; tensor linear_13_cast_fp16 = linear(bias = var_452_to_fp16, weight = var_451_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor([-1])]; tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66020992)))]; tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66022080)))]; tensor var_459_cast_fp16 = layer_norm(axes = var_459_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_459_cast_fp16")]; tensor var_468_to_fp16 = const()[name = string("op_468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66023168)))]; tensor var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68120384)))]; tensor linear_14_cast_fp16 = linear(bias = var_469_to_fp16, weight = var_468_to_fp16, x = var_459_cast_fp16)[name = string("linear_14_cast_fp16")]; string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; tensor var_474_to_fp16 = const()[name = string("op_474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68124544)))]; tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70221760)))]; tensor linear_15_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 512])]; tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_14)[name = string("k_cache_9_cast_fp16")]; tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 512])]; tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_15)[name = string("v_cache_9_cast_fp16")]; tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 512])]; tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 512])]; tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; int32 var_498 = const()[name = string("op_498"), val = int32(-1)]; tensor var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor([-1])]; tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70222848)))]; tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70223936)))]; fp16 var_504_to_fp16 = const()[name = string("op_504_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_516_cast_fp16 = layer_norm(axes = var_516_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_516_cast_fp16")]; tensor var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70225024)))]; tensor var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70749376)))]; tensor linear_16_cast_fp16 = linear(bias = var_528_to_fp16, weight = var_527_to_fp16, x = var_516_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70750464)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_531_to_fp16, x = var_516_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor var_535_to_fp16 = const()[name = string("op_535_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71274816)))]; tensor var_536_to_fp16 = const()[name = string("op_536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71799168)))]; tensor linear_18_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_516_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_538_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_538_shape_cast_fp16")]; int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; string var_538_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_538_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; tensor var_538_shape_cast_fp16_to_uint16 = cast(dtype = var_538_shape_cast_fp16_to_uint16_dtype_0, x = var_538_shape_cast_fp16)[name = string("cast_74")]; uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_538_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_73")]; int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_14)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_16 = read_state(input = k_cache1)[name = string("coreml_update_state_16")]; tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_15)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_17 = read_state(input = v_cache1)[name = string("coreml_update_state_17")]; int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(512)]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; tensor var_554_begin_0 = const()[name = string("op_554_begin_0"), val = tensor([0, 0, 0])]; tensor var_554_end_mask_0 = const()[name = string("op_554_end_mask_0"), val = tensor([true, false, true])]; tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = concat_54, end_mask = var_554_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_554_cast_fp16")]; tensor var_557_begin_0 = const()[name = string("op_557_begin_0"), val = tensor([0, 0, 0])]; tensor var_557_end_mask_0 = const()[name = string("op_557_end_mask_0"), val = tensor([true, false, true])]; tensor var_557_cast_fp16 = slice_by_index(begin = var_557_begin_0, end = concat_54, end_mask = var_557_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_557_cast_fp16")]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 8, 64])]; tensor var_567_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_567_cast_fp16")]; tensor const_38_to_fp16 = const()[name = string("const_38_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_19_cast_fp16 = mul(x = var_567_cast_fp16, y = const_38_to_fp16)[name = string("q_19_cast_fp16")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 8, 64])]; tensor var_574_cast_fp16 = reshape(shape = concat_57x, x = var_554_cast_fp16)[name = string("op_574_cast_fp16")]; tensor const_39_to_fp16 = const()[name = string("const_39_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_25_cast_fp16 = mul(x = var_574_cast_fp16, y = const_39_to_fp16)[name = string("k_25_cast_fp16")]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 8, 64])]; tensor var_581_cast_fp16 = reshape(shape = concat_58x, x = var_557_cast_fp16)[name = string("op_581_cast_fp16")]; tensor var_582 = const()[name = string("op_582"), val = tensor([0, 2, 1, 3])]; bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = k_25_cast_fp16)[name = string("transpose_102")]; tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = q_19_cast_fp16)[name = string("transpose_103")]; tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_57, y = transpose_58)[name = string("qk_13_cast_fp16")]; int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; tensor var_585_begin_0 = const()[name = string("op_585_begin_0"), val = tensor([0, 0])]; tensor var_585_end_mask_0 = const()[name = string("op_585_end_mask_0"), val = tensor([false, true])]; tensor var_585_cast_fp16 = slice_by_index(begin = var_585_begin_0, end = concat_59, end_mask = var_585_end_mask_0, x = mask_to_fp16)[name = string("op_585_cast_fp16")]; int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; tensor var_586_begin_0 = const()[name = string("op_586_begin_0"), val = tensor([0, 0])]; tensor var_586_end_mask_0 = const()[name = string("op_586_end_mask_0"), val = tensor([true, false])]; tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = concat_60, end_mask = var_586_end_mask_0, x = var_585_cast_fp16)[name = string("op_586_cast_fp16")]; tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_586_cast_fp16)[name = string("qk_15_cast_fp16")]; tensor var_589_cast_fp16 = softmax(axis = var_498, x = qk_15_cast_fp16)[name = string("op_589_cast_fp16")]; bool var_591_transpose_x_0 = const()[name = string("op_591_transpose_x_0"), val = bool(false)]; bool var_591_transpose_y_0 = const()[name = string("op_591_transpose_y_0"), val = bool(false)]; tensor v_25_cast_fp16 = transpose(perm = var_582, x = var_581_cast_fp16)[name = string("transpose_104")]; tensor var_591_cast_fp16 = matmul(transpose_x = var_591_transpose_x_0, transpose_y = var_591_transpose_y_0, x = var_589_cast_fp16, y = v_25_cast_fp16)[name = string("op_591_cast_fp16")]; tensor var_592 = const()[name = string("op_592"), val = tensor([0, 2, 1, 3])]; tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 512])]; tensor var_593_cast_fp16 = transpose(perm = var_592, x = var_591_cast_fp16)[name = string("transpose_101")]; tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_593_cast_fp16)[name = string("x_43_cast_fp16")]; tensor var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71800256)))]; tensor var_598_to_fp16 = const()[name = string("op_598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72324608)))]; tensor linear_19_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; tensor var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor([-1])]; tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72325696)))]; tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72326784)))]; tensor var_605_cast_fp16 = layer_norm(axes = var_605_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_605_cast_fp16")]; tensor var_614_to_fp16 = const()[name = string("op_614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72327872)))]; tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72852224)))]; tensor linear_20_cast_fp16 = linear(bias = var_615_to_fp16, weight = var_614_to_fp16, x = var_605_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 8, 64])]; tensor var_635_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_635_cast_fp16")]; tensor const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_23_cast_fp16 = mul(x = var_635_cast_fp16, y = const_40_to_fp16)[name = string("q_23_cast_fp16")]; tensor var_641 = const()[name = string("op_641"), val = tensor([1, 1500, 8, -1])]; tensor var_642_cast_fp16 = reshape(shape = var_641, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_642_cast_fp16")]; tensor const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_29_cast_fp16 = mul(x = var_642_cast_fp16, y = const_41_to_fp16)[name = string("k_29_cast_fp16")]; tensor var_648 = const()[name = string("op_648"), val = tensor([1, 1500, 8, -1])]; tensor var_649_cast_fp16 = reshape(shape = var_648, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_649_cast_fp16")]; tensor var_650 = const()[name = string("op_650"), val = tensor([0, 2, 1, 3])]; bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_60 = transpose(perm = transpose_60_perm_0, x = k_29_cast_fp16)[name = string("transpose_98")]; tensor transpose_59 = transpose(perm = transpose_59_perm_0, x = q_23_cast_fp16)[name = string("transpose_99")]; tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_59, y = transpose_60)[name = string("qk_17_cast_fp16")]; tensor var_654_cast_fp16 = softmax(axis = var_498, x = qk_17_cast_fp16)[name = string("op_654_cast_fp16")]; bool var_656_transpose_x_0 = const()[name = string("op_656_transpose_x_0"), val = bool(false)]; bool var_656_transpose_y_0 = const()[name = string("op_656_transpose_y_0"), val = bool(false)]; tensor v_29_cast_fp16 = transpose(perm = var_650, x = var_649_cast_fp16)[name = string("transpose_100")]; tensor var_656_cast_fp16 = matmul(transpose_x = var_656_transpose_x_0, transpose_y = var_656_transpose_y_0, x = var_654_cast_fp16, y = v_29_cast_fp16)[name = string("op_656_cast_fp16")]; tensor var_657 = const()[name = string("op_657"), val = tensor([0, 2, 1, 3])]; tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 512])]; tensor var_658_cast_fp16 = transpose(perm = var_657, x = var_656_cast_fp16)[name = string("transpose_97")]; tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_658_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_662_to_fp16 = const()[name = string("op_662_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72853312)))]; tensor var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73377664)))]; tensor linear_21_cast_fp16 = linear(bias = var_663_to_fp16, weight = var_662_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; tensor var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor([-1])]; tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73378752)))]; tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73379840)))]; tensor var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_670_cast_fp16")]; tensor var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73380928)))]; tensor var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75478144)))]; tensor linear_22_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = var_670_cast_fp16)[name = string("linear_22_cast_fp16")]; string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; tensor var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75482304)))]; tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77579520)))]; tensor linear_23_cast_fp16 = linear(bias = var_686_to_fp16, weight = var_685_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 512])]; tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_16)[name = string("k_cache_13_cast_fp16")]; tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 512])]; tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_17)[name = string("v_cache_13_cast_fp16")]; tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 512])]; tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 512])]; tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; int32 var_709 = const()[name = string("op_709"), val = int32(-1)]; tensor var_727_axes_0 = const()[name = string("op_727_axes_0"), val = tensor([-1])]; tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77580608)))]; tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77581696)))]; fp16 var_715_to_fp16 = const()[name = string("op_715_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_727_cast_fp16 = layer_norm(axes = var_727_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_727_cast_fp16")]; tensor var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77582784)))]; tensor var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78107136)))]; tensor linear_24_cast_fp16 = linear(bias = var_739_to_fp16, weight = var_738_to_fp16, x = var_727_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_742_to_fp16 = const()[name = string("op_742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78108224)))]; tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_742_to_fp16, x = var_727_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_746_to_fp16 = const()[name = string("op_746_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78632576)))]; tensor var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79156928)))]; tensor linear_26_cast_fp16 = linear(bias = var_747_to_fp16, weight = var_746_to_fp16, x = var_727_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor var_749_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_749_shape_cast_fp16")]; int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; string var_749_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_749_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; tensor var_749_shape_cast_fp16_to_uint16 = cast(dtype = var_749_shape_cast_fp16_to_uint16_dtype_0, x = var_749_shape_cast_fp16)[name = string("cast_72")]; uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_749_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_71")]; int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_16)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_18 = read_state(input = k_cache1)[name = string("coreml_update_state_18")]; tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_17)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_19 = read_state(input = v_cache1)[name = string("coreml_update_state_19")]; int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(512)]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; tensor var_765_begin_0 = const()[name = string("op_765_begin_0"), val = tensor([0, 0, 0])]; tensor var_765_end_mask_0 = const()[name = string("op_765_end_mask_0"), val = tensor([true, false, true])]; tensor var_765_cast_fp16 = slice_by_index(begin = var_765_begin_0, end = concat_76, end_mask = var_765_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_765_cast_fp16")]; tensor var_768_begin_0 = const()[name = string("op_768_begin_0"), val = tensor([0, 0, 0])]; tensor var_768_end_mask_0 = const()[name = string("op_768_end_mask_0"), val = tensor([true, false, true])]; tensor var_768_cast_fp16 = slice_by_index(begin = var_768_begin_0, end = concat_76, end_mask = var_768_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_768_cast_fp16")]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 8, 64])]; tensor var_778_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_778_cast_fp16")]; tensor const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_27_cast_fp16 = mul(x = var_778_cast_fp16, y = const_42_to_fp16)[name = string("q_27_cast_fp16")]; tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 8, 64])]; tensor var_785_cast_fp16 = reshape(shape = concat_79x, x = var_765_cast_fp16)[name = string("op_785_cast_fp16")]; tensor const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_35_cast_fp16 = mul(x = var_785_cast_fp16, y = const_43_to_fp16)[name = string("k_35_cast_fp16")]; tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 8, 64])]; tensor var_792_cast_fp16 = reshape(shape = concat_80x, x = var_768_cast_fp16)[name = string("op_792_cast_fp16")]; tensor var_793 = const()[name = string("op_793"), val = tensor([0, 2, 1, 3])]; bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; tensor transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_62 = transpose(perm = transpose_62_perm_0, x = k_35_cast_fp16)[name = string("transpose_94")]; tensor transpose_61 = transpose(perm = transpose_61_perm_0, x = q_27_cast_fp16)[name = string("transpose_95")]; tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_61, y = transpose_62)[name = string("qk_19_cast_fp16")]; int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; tensor var_796_begin_0 = const()[name = string("op_796_begin_0"), val = tensor([0, 0])]; tensor var_796_end_mask_0 = const()[name = string("op_796_end_mask_0"), val = tensor([false, true])]; tensor var_796_cast_fp16 = slice_by_index(begin = var_796_begin_0, end = concat_81, end_mask = var_796_end_mask_0, x = mask_to_fp16)[name = string("op_796_cast_fp16")]; int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; tensor var_797_begin_0 = const()[name = string("op_797_begin_0"), val = tensor([0, 0])]; tensor var_797_end_mask_0 = const()[name = string("op_797_end_mask_0"), val = tensor([true, false])]; tensor var_797_cast_fp16 = slice_by_index(begin = var_797_begin_0, end = concat_82, end_mask = var_797_end_mask_0, x = var_796_cast_fp16)[name = string("op_797_cast_fp16")]; tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_797_cast_fp16)[name = string("qk_21_cast_fp16")]; tensor var_800_cast_fp16 = softmax(axis = var_709, x = qk_21_cast_fp16)[name = string("op_800_cast_fp16")]; bool var_802_transpose_x_0 = const()[name = string("op_802_transpose_x_0"), val = bool(false)]; bool var_802_transpose_y_0 = const()[name = string("op_802_transpose_y_0"), val = bool(false)]; tensor v_35_cast_fp16 = transpose(perm = var_793, x = var_792_cast_fp16)[name = string("transpose_96")]; tensor var_802_cast_fp16 = matmul(transpose_x = var_802_transpose_x_0, transpose_y = var_802_transpose_y_0, x = var_800_cast_fp16, y = v_35_cast_fp16)[name = string("op_802_cast_fp16")]; tensor var_803 = const()[name = string("op_803"), val = tensor([0, 2, 1, 3])]; tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 512])]; tensor var_804_cast_fp16 = transpose(perm = var_803, x = var_802_cast_fp16)[name = string("transpose_93")]; tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_804_cast_fp16)[name = string("x_61_cast_fp16")]; tensor var_808_to_fp16 = const()[name = string("op_808_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79158016)))]; tensor var_809_to_fp16 = const()[name = string("op_809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79682368)))]; tensor linear_27_cast_fp16 = linear(bias = var_809_to_fp16, weight = var_808_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; tensor var_816_axes_0 = const()[name = string("op_816_axes_0"), val = tensor([-1])]; tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79683456)))]; tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79684544)))]; tensor var_816_cast_fp16 = layer_norm(axes = var_816_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_816_cast_fp16")]; tensor var_825_to_fp16 = const()[name = string("op_825_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79685632)))]; tensor var_826_to_fp16 = const()[name = string("op_826_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80209984)))]; tensor linear_28_cast_fp16 = linear(bias = var_826_to_fp16, weight = var_825_to_fp16, x = var_816_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 8, 64])]; tensor var_846_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_846_cast_fp16")]; tensor const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_31_cast_fp16 = mul(x = var_846_cast_fp16, y = const_44_to_fp16)[name = string("q_31_cast_fp16")]; tensor var_852 = const()[name = string("op_852"), val = tensor([1, 1500, 8, -1])]; tensor var_853_cast_fp16 = reshape(shape = var_852, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_853_cast_fp16")]; tensor const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_39_cast_fp16 = mul(x = var_853_cast_fp16, y = const_45_to_fp16)[name = string("k_39_cast_fp16")]; tensor var_859 = const()[name = string("op_859"), val = tensor([1, 1500, 8, -1])]; tensor var_860_cast_fp16 = reshape(shape = var_859, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_860_cast_fp16")]; tensor var_861 = const()[name = string("op_861"), val = tensor([0, 2, 1, 3])]; bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; tensor transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = k_39_cast_fp16)[name = string("transpose_90")]; tensor transpose_63 = transpose(perm = transpose_63_perm_0, x = q_31_cast_fp16)[name = string("transpose_91")]; tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_63, y = transpose_64)[name = string("qk_23_cast_fp16")]; tensor var_865_cast_fp16 = softmax(axis = var_709, x = qk_23_cast_fp16)[name = string("op_865_cast_fp16")]; bool var_867_transpose_x_0 = const()[name = string("op_867_transpose_x_0"), val = bool(false)]; bool var_867_transpose_y_0 = const()[name = string("op_867_transpose_y_0"), val = bool(false)]; tensor v_39_cast_fp16 = transpose(perm = var_861, x = var_860_cast_fp16)[name = string("transpose_92")]; tensor var_867_cast_fp16 = matmul(transpose_x = var_867_transpose_x_0, transpose_y = var_867_transpose_y_0, x = var_865_cast_fp16, y = v_39_cast_fp16)[name = string("op_867_cast_fp16")]; tensor var_868 = const()[name = string("op_868"), val = tensor([0, 2, 1, 3])]; tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 512])]; tensor var_869_cast_fp16 = transpose(perm = var_868, x = var_867_cast_fp16)[name = string("transpose_89")]; tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_869_cast_fp16)[name = string("x_67_cast_fp16")]; tensor var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80211072)))]; tensor var_874_to_fp16 = const()[name = string("op_874_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80735424)))]; tensor linear_29_cast_fp16 = linear(bias = var_874_to_fp16, weight = var_873_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; tensor var_881_axes_0 = const()[name = string("op_881_axes_0"), val = tensor([-1])]; tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80736512)))]; tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80737600)))]; tensor var_881_cast_fp16 = layer_norm(axes = var_881_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_881_cast_fp16")]; tensor var_890_to_fp16 = const()[name = string("op_890_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80738688)))]; tensor var_891_to_fp16 = const()[name = string("op_891_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82835904)))]; tensor linear_30_cast_fp16 = linear(bias = var_891_to_fp16, weight = var_890_to_fp16, x = var_881_cast_fp16)[name = string("linear_30_cast_fp16")]; string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_896_to_fp16 = const()[name = string("op_896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82840064)))]; tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84937280)))]; tensor linear_31_cast_fp16 = linear(bias = var_897_to_fp16, weight = var_896_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 512])]; tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_18)[name = string("k_cache_17_cast_fp16")]; tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 512])]; tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_19)[name = string("v_cache_17_cast_fp16")]; tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 512])]; tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 512])]; tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; int32 var_920 = const()[name = string("op_920"), val = int32(-1)]; tensor var_938_axes_0 = const()[name = string("op_938_axes_0"), val = tensor([-1])]; tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84938368)))]; tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84939456)))]; fp16 var_926_to_fp16 = const()[name = string("op_926_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_938_cast_fp16 = layer_norm(axes = var_938_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_938_cast_fp16")]; tensor var_949_to_fp16 = const()[name = string("op_949_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84940544)))]; tensor var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85464896)))]; tensor linear_32_cast_fp16 = linear(bias = var_950_to_fp16, weight = var_949_to_fp16, x = var_938_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_953_to_fp16 = const()[name = string("op_953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85465984)))]; tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_953_to_fp16, x = var_938_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor var_957_to_fp16 = const()[name = string("op_957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85990336)))]; tensor var_958_to_fp16 = const()[name = string("op_958_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86514688)))]; tensor linear_34_cast_fp16 = linear(bias = var_958_to_fp16, weight = var_957_to_fp16, x = var_938_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_960_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_960_shape_cast_fp16")]; int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; string var_960_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_960_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; tensor var_960_shape_cast_fp16_to_uint16 = cast(dtype = var_960_shape_cast_fp16_to_uint16_dtype_0, x = var_960_shape_cast_fp16)[name = string("cast_70")]; uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_960_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_69")]; int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_18)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_20 = read_state(input = k_cache1)[name = string("coreml_update_state_20")]; tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_19)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_21 = read_state(input = v_cache1)[name = string("coreml_update_state_21")]; int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(512)]; int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; tensor var_976_begin_0 = const()[name = string("op_976_begin_0"), val = tensor([0, 0, 0])]; tensor var_976_end_mask_0 = const()[name = string("op_976_end_mask_0"), val = tensor([true, false, true])]; tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = concat_98, end_mask = var_976_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_976_cast_fp16")]; tensor var_979_begin_0 = const()[name = string("op_979_begin_0"), val = tensor([0, 0, 0])]; tensor var_979_end_mask_0 = const()[name = string("op_979_end_mask_0"), val = tensor([true, false, true])]; tensor var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = concat_98, end_mask = var_979_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_979_cast_fp16")]; tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 8, 64])]; tensor var_989_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_989_cast_fp16")]; tensor const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_35_cast_fp16 = mul(x = var_989_cast_fp16, y = const_46_to_fp16)[name = string("q_35_cast_fp16")]; tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 8, 64])]; tensor var_996_cast_fp16 = reshape(shape = concat_101x, x = var_976_cast_fp16)[name = string("op_996_cast_fp16")]; tensor const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_45_cast_fp16 = mul(x = var_996_cast_fp16, y = const_47_to_fp16)[name = string("k_45_cast_fp16")]; tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 8, 64])]; tensor var_1003_cast_fp16 = reshape(shape = concat_102x, x = var_979_cast_fp16)[name = string("op_1003_cast_fp16")]; tensor var_1004 = const()[name = string("op_1004"), val = tensor([0, 2, 1, 3])]; bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = k_45_cast_fp16)[name = string("transpose_86")]; tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = q_35_cast_fp16)[name = string("transpose_87")]; tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_65, y = transpose_66)[name = string("qk_25_cast_fp16")]; int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([0, 0])]; tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = concat_103, end_mask = var_1007_end_mask_0, x = mask_to_fp16)[name = string("op_1007_cast_fp16")]; int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; tensor var_1008_begin_0 = const()[name = string("op_1008_begin_0"), val = tensor([0, 0])]; tensor var_1008_end_mask_0 = const()[name = string("op_1008_end_mask_0"), val = tensor([true, false])]; tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = concat_104, end_mask = var_1008_end_mask_0, x = var_1007_cast_fp16)[name = string("op_1008_cast_fp16")]; tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1008_cast_fp16)[name = string("qk_27_cast_fp16")]; tensor var_1011_cast_fp16 = softmax(axis = var_920, x = qk_27_cast_fp16)[name = string("op_1011_cast_fp16")]; bool var_1013_transpose_x_0 = const()[name = string("op_1013_transpose_x_0"), val = bool(false)]; bool var_1013_transpose_y_0 = const()[name = string("op_1013_transpose_y_0"), val = bool(false)]; tensor v_45_cast_fp16 = transpose(perm = var_1004, x = var_1003_cast_fp16)[name = string("transpose_88")]; tensor var_1013_cast_fp16 = matmul(transpose_x = var_1013_transpose_x_0, transpose_y = var_1013_transpose_y_0, x = var_1011_cast_fp16, y = v_45_cast_fp16)[name = string("op_1013_cast_fp16")]; tensor var_1014 = const()[name = string("op_1014"), val = tensor([0, 2, 1, 3])]; tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 512])]; tensor var_1015_cast_fp16 = transpose(perm = var_1014, x = var_1013_cast_fp16)[name = string("transpose_85")]; tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1015_cast_fp16)[name = string("x_79_cast_fp16")]; tensor var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86515776)))]; tensor var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87040128)))]; tensor linear_35_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; tensor var_1027_axes_0 = const()[name = string("op_1027_axes_0"), val = tensor([-1])]; tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87041216)))]; tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87042304)))]; tensor var_1027_cast_fp16 = layer_norm(axes = var_1027_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1027_cast_fp16")]; tensor var_1036_to_fp16 = const()[name = string("op_1036_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87043392)))]; tensor var_1037_to_fp16 = const()[name = string("op_1037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87567744)))]; tensor linear_36_cast_fp16 = linear(bias = var_1037_to_fp16, weight = var_1036_to_fp16, x = var_1027_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 8, 64])]; tensor var_1057_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1057_cast_fp16")]; tensor const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_39_cast_fp16 = mul(x = var_1057_cast_fp16, y = const_48_to_fp16)[name = string("q_39_cast_fp16")]; tensor var_1063 = const()[name = string("op_1063"), val = tensor([1, 1500, 8, -1])]; tensor var_1064_cast_fp16 = reshape(shape = var_1063, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1064_cast_fp16")]; tensor const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_49_cast_fp16 = mul(x = var_1064_cast_fp16, y = const_49_to_fp16)[name = string("k_49_cast_fp16")]; tensor var_1070 = const()[name = string("op_1070"), val = tensor([1, 1500, 8, -1])]; tensor var_1071_cast_fp16 = reshape(shape = var_1070, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1071_cast_fp16")]; tensor var_1072 = const()[name = string("op_1072"), val = tensor([0, 2, 1, 3])]; bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = k_49_cast_fp16)[name = string("transpose_82")]; tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = q_39_cast_fp16)[name = string("transpose_83")]; tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_67, y = transpose_68)[name = string("qk_29_cast_fp16")]; tensor var_1076_cast_fp16 = softmax(axis = var_920, x = qk_29_cast_fp16)[name = string("op_1076_cast_fp16")]; bool var_1078_transpose_x_0 = const()[name = string("op_1078_transpose_x_0"), val = bool(false)]; bool var_1078_transpose_y_0 = const()[name = string("op_1078_transpose_y_0"), val = bool(false)]; tensor v_49_cast_fp16 = transpose(perm = var_1072, x = var_1071_cast_fp16)[name = string("transpose_84")]; tensor var_1078_cast_fp16 = matmul(transpose_x = var_1078_transpose_x_0, transpose_y = var_1078_transpose_y_0, x = var_1076_cast_fp16, y = v_49_cast_fp16)[name = string("op_1078_cast_fp16")]; tensor var_1079 = const()[name = string("op_1079"), val = tensor([0, 2, 1, 3])]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 512])]; tensor var_1080_cast_fp16 = transpose(perm = var_1079, x = var_1078_cast_fp16)[name = string("transpose_81")]; tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1080_cast_fp16)[name = string("x_85_cast_fp16")]; tensor var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87568832)))]; tensor var_1085_to_fp16 = const()[name = string("op_1085_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88093184)))]; tensor linear_37_cast_fp16 = linear(bias = var_1085_to_fp16, weight = var_1084_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; tensor var_1092_axes_0 = const()[name = string("op_1092_axes_0"), val = tensor([-1])]; tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88094272)))]; tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88095360)))]; tensor var_1092_cast_fp16 = layer_norm(axes = var_1092_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1092_cast_fp16")]; tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88096448)))]; tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90193664)))]; tensor linear_38_cast_fp16 = linear(bias = var_1102_to_fp16, weight = var_1101_to_fp16, x = var_1092_cast_fp16)[name = string("linear_38_cast_fp16")]; string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; tensor var_1107_to_fp16 = const()[name = string("op_1107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90197824)))]; tensor var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92295040)))]; tensor linear_39_cast_fp16 = linear(bias = var_1108_to_fp16, weight = var_1107_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 512])]; tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_20)[name = string("k_cache_21_cast_fp16")]; tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 512])]; tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_21)[name = string("v_cache_21_cast_fp16")]; tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([5, 0, 0, 0])]; tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([6, 1, 1500, 512])]; tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([5, 0, 0, 0])]; tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([6, 1, 1500, 512])]; tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; int32 var_1131 = const()[name = string("op_1131"), val = int32(-1)]; tensor var_1149_axes_0 = const()[name = string("op_1149_axes_0"), val = tensor([-1])]; tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92296128)))]; tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92297216)))]; fp16 var_1137_to_fp16 = const()[name = string("op_1137_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1149_cast_fp16 = layer_norm(axes = var_1149_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1149_cast_fp16")]; tensor var_1160_to_fp16 = const()[name = string("op_1160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92298304)))]; tensor var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92822656)))]; tensor linear_40_cast_fp16 = linear(bias = var_1161_to_fp16, weight = var_1160_to_fp16, x = var_1149_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor var_1164_to_fp16 = const()[name = string("op_1164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92823744)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1164_to_fp16, x = var_1149_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93348096)))]; tensor var_1169_to_fp16 = const()[name = string("op_1169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93872448)))]; tensor linear_42_cast_fp16 = linear(bias = var_1169_to_fp16, weight = var_1168_to_fp16, x = var_1149_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_1171_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1171_shape_cast_fp16")]; int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; string var_1171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; tensor var_1171_shape_cast_fp16_to_uint16 = cast(dtype = var_1171_shape_cast_fp16_to_uint16_dtype_0, x = var_1171_shape_cast_fp16)[name = string("cast_68")]; uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1171_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_67")]; int32 end_step = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step")]; tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step)[name = string("expand_dims_83")]; tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_20)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_22_write_state")]; tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_21)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_23_write_state")]; int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(512)]; int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step, concat_120_values2_0))[name = string("concat_120")]; tensor var_1187_begin_0 = const()[name = string("op_1187_begin_0"), val = tensor([0, 0, 0])]; tensor var_1187_end_mask_0 = const()[name = string("op_1187_end_mask_0"), val = tensor([true, false, true])]; tensor var_1187_cast_fp16 = slice_by_index(begin = var_1187_begin_0, end = concat_120, end_mask = var_1187_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1187_cast_fp16")]; tensor var_1190_begin_0 = const()[name = string("op_1190_begin_0"), val = tensor([0, 0, 0])]; tensor var_1190_end_mask_0 = const()[name = string("op_1190_end_mask_0"), val = tensor([true, false, true])]; tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = concat_120, end_mask = var_1190_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1190_cast_fp16")]; tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 8, 64])]; tensor var_1200_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1200_cast_fp16")]; tensor const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_43_cast_fp16 = mul(x = var_1200_cast_fp16, y = const_50_to_fp16)[name = string("q_43_cast_fp16")]; tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 8, 64])]; tensor var_1207_cast_fp16 = reshape(shape = concat_123x, x = var_1187_cast_fp16)[name = string("op_1207_cast_fp16")]; tensor const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_55_cast_fp16 = mul(x = var_1207_cast_fp16, y = const_51_to_fp16)[name = string("k_55_cast_fp16")]; tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 8, 64])]; tensor var_1214_cast_fp16 = reshape(shape = concat_124x, x = var_1190_cast_fp16)[name = string("op_1214_cast_fp16")]; tensor var_1215 = const()[name = string("op_1215"), val = tensor([0, 2, 1, 3])]; bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = k_55_cast_fp16)[name = string("transpose_78")]; tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = q_43_cast_fp16)[name = string("transpose_79")]; tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_69, y = transpose_70)[name = string("qk_31_cast_fp16")]; int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; tensor var_1218_begin_0 = const()[name = string("op_1218_begin_0"), val = tensor([0, 0])]; tensor var_1218_end_mask_0 = const()[name = string("op_1218_end_mask_0"), val = tensor([false, true])]; tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = concat_125, end_mask = var_1218_end_mask_0, x = mask_to_fp16)[name = string("op_1218_cast_fp16")]; int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; tensor var_1219_begin_0 = const()[name = string("op_1219_begin_0"), val = tensor([0, 0])]; tensor var_1219_end_mask_0 = const()[name = string("op_1219_end_mask_0"), val = tensor([true, false])]; tensor var_1219_cast_fp16 = slice_by_index(begin = var_1219_begin_0, end = concat_126, end_mask = var_1219_end_mask_0, x = var_1218_cast_fp16)[name = string("op_1219_cast_fp16")]; tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1219_cast_fp16)[name = string("qk_33_cast_fp16")]; tensor var_1222_cast_fp16 = softmax(axis = var_1131, x = qk_33_cast_fp16)[name = string("op_1222_cast_fp16")]; bool var_1224_transpose_x_0 = const()[name = string("op_1224_transpose_x_0"), val = bool(false)]; bool var_1224_transpose_y_0 = const()[name = string("op_1224_transpose_y_0"), val = bool(false)]; tensor v_55_cast_fp16 = transpose(perm = var_1215, x = var_1214_cast_fp16)[name = string("transpose_80")]; tensor var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = var_1222_cast_fp16, y = v_55_cast_fp16)[name = string("op_1224_cast_fp16")]; tensor var_1225 = const()[name = string("op_1225"), val = tensor([0, 2, 1, 3])]; tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 512])]; tensor var_1226_cast_fp16 = transpose(perm = var_1225, x = var_1224_cast_fp16)[name = string("transpose_77")]; tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1226_cast_fp16)[name = string("x_97_cast_fp16")]; tensor var_1230_to_fp16 = const()[name = string("op_1230_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93873536)))]; tensor var_1231_to_fp16 = const()[name = string("op_1231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94397888)))]; tensor linear_43_cast_fp16 = linear(bias = var_1231_to_fp16, weight = var_1230_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; tensor var_1238_axes_0 = const()[name = string("op_1238_axes_0"), val = tensor([-1])]; tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94398976)))]; tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94400064)))]; tensor var_1238_cast_fp16 = layer_norm(axes = var_1238_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1238_cast_fp16")]; tensor var_1247_to_fp16 = const()[name = string("op_1247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94401152)))]; tensor var_1248_to_fp16 = const()[name = string("op_1248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94925504)))]; tensor linear_44_cast_fp16 = linear(bias = var_1248_to_fp16, weight = var_1247_to_fp16, x = var_1238_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 8, 64])]; tensor var_1268_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1268_cast_fp16")]; tensor const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor q_cast_fp16 = mul(x = var_1268_cast_fp16, y = const_52_to_fp16)[name = string("q_cast_fp16")]; tensor var_1274 = const()[name = string("op_1274"), val = tensor([1, 1500, 8, -1])]; tensor var_1275_cast_fp16 = reshape(shape = var_1274, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1275_cast_fp16")]; tensor const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; tensor k_cast_fp16 = mul(x = var_1275_cast_fp16, y = const_53_to_fp16)[name = string("k_cast_fp16")]; tensor var_1281 = const()[name = string("op_1281"), val = tensor([1, 1500, 8, -1])]; tensor var_1282_cast_fp16 = reshape(shape = var_1281, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1282_cast_fp16")]; tensor var_1283 = const()[name = string("op_1283"), val = tensor([0, 2, 1, 3])]; bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = k_cast_fp16)[name = string("transpose_74")]; tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = q_cast_fp16)[name = string("transpose_75")]; tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_71, y = transpose_72)[name = string("qk_cast_fp16")]; tensor var_1287_cast_fp16 = softmax(axis = var_1131, x = qk_cast_fp16)[name = string("op_1287_cast_fp16")]; bool var_1289_transpose_x_0 = const()[name = string("op_1289_transpose_x_0"), val = bool(false)]; bool var_1289_transpose_y_0 = const()[name = string("op_1289_transpose_y_0"), val = bool(false)]; tensor v_cast_fp16 = transpose(perm = var_1283, x = var_1282_cast_fp16)[name = string("transpose_76")]; tensor var_1289_cast_fp16 = matmul(transpose_x = var_1289_transpose_x_0, transpose_y = var_1289_transpose_y_0, x = var_1287_cast_fp16, y = v_cast_fp16)[name = string("op_1289_cast_fp16")]; tensor var_1290 = const()[name = string("op_1290"), val = tensor([0, 2, 1, 3])]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 512])]; tensor var_1291_cast_fp16 = transpose(perm = var_1290, x = var_1289_cast_fp16)[name = string("transpose_73")]; tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1291_cast_fp16)[name = string("x_103_cast_fp16")]; tensor var_1295_to_fp16 = const()[name = string("op_1295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94926592)))]; tensor var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95450944)))]; tensor linear_45_cast_fp16 = linear(bias = var_1296_to_fp16, weight = var_1295_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; tensor var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor([-1])]; tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95452032)))]; tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95453120)))]; tensor var_1303_cast_fp16 = layer_norm(axes = var_1303_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1303_cast_fp16")]; tensor var_1312_to_fp16 = const()[name = string("op_1312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95454208)))]; tensor var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97551424)))]; tensor linear_46_cast_fp16 = linear(bias = var_1313_to_fp16, weight = var_1312_to_fp16, x = var_1303_cast_fp16)[name = string("linear_46_cast_fp16")]; string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; tensor var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97555584)))]; tensor var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99652800)))]; tensor linear_47_cast_fp16 = linear(bias = var_1319_to_fp16, weight = var_1318_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; tensor var_1332_axes_0 = const()[name = string("op_1332_axes_0"), val = tensor([-1])]; tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99653888)))]; tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99654976)))]; fp16 var_1323_to_fp16 = const()[name = string("op_1323_to_fp16"), val = fp16(0x1.5p-17)]; tensor var_1332_cast_fp16 = layer_norm(axes = var_1332_axes_0, beta = ln_bias_to_fp16, epsilon = var_1323_to_fp16, gamma = ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1332_cast_fp16")]; tensor var_1342_bias_0_to_fp16 = const()[name = string("op_1342_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99656064)))]; tensor logits = linear(bias = var_1342_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_1332_cast_fp16)[name = string("op_1342_cast_fp16")]; } -> (logits); }