lithium0003's picture
initial commit
ca32d55
program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
{
func main<ios18>(state<tensor<fp16, [6, 1, 448, 512]>> k_cache1, state<tensor<fp16, [6, 1, 1500, 512]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [6, 1, 448, 512]>> v_cache1, state<tensor<fp16, [6, 1, 1500, 512]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
tensor<int32, [2]> var_26_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_26_shape_cast_fp16")];
int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
string var_26_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_26_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
tensor<int16, [2]> var_26_shape_cast_fp16_to_int16 = cast(dtype = var_26_shape_cast_fp16_to_int16_dtype_0, x = var_26_shape_cast_fp16)[name = string("cast_82")];
int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_26_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
tensor<int32, [2]> var_30_shape = shape(x = token_data)[name = string("op_30_shape")];
int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
string var_30_shape_to_uint16_dtype_0 = const()[name = string("op_30_shape_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
tensor<uint16, [2]> var_30_shape_to_uint16 = cast(dtype = var_30_shape_to_uint16_dtype_0, x = var_30_shape)[name = string("cast_80")];
uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_30_shape_to_uint16)[name = string("gather_1_cast_uint16")];
string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_79")];
int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_81")];
int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
int32 var_50_axis_0 = const()[name = string("op_50_axis_0"), val = int32(0)];
int32 var_50_batch_dims_0 = const()[name = string("op_50_batch_dims_0"), val = int32(0)];
bool var_50_validate_indices_0 = const()[name = string("op_50_validate_indices_0"), val = bool(false)];
tensor<fp16, [51865, 512]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
tensor<fp16, [1, ?, 512]> var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = token_data, validate_indices = var_50_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_50_cast_fp16")];
int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(512)];
int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
tensor<bool, [2]> var_53_end_mask_0 = const()[name = string("op_53_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [448, 512]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53109888)))];
tensor<fp16, [?, ?]> var_53_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_53_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_53_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_3_cast_fp16 = add(x = var_50_cast_fp16, y = var_53_cast_fp16)[name = string("x_3_cast_fp16")];
tensor<fp16, [6, 1, 448, 512]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 512])];
tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
tensor<fp16, [6, 1, 448, 512]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 512])];
tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
tensor<fp16, [6, 1, 1500, 512]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 512])];
tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
tensor<fp16, [6, 1, 1500, 512]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 512])];
tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
int32 var_76 = const()[name = string("op_76"), val = int32(-1)];
tensor<int32, [1]> var_94_axes_0 = const()[name = string("op_94_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53568704)))];
tensor<fp16, [512]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53569792)))];
fp16 var_82_to_fp16 = const()[name = string("op_82_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_94_cast_fp16 = layer_norm(axes = var_94_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_94_cast_fp16")];
tensor<fp16, [512, 512]> var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53570880)))];
tensor<fp16, [512]> var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54095232)))];
tensor<fp16, [1, ?, 512]> linear_0_cast_fp16 = linear(bias = var_106_to_fp16, weight = var_105_to_fp16, x = var_94_cast_fp16)[name = string("linear_0_cast_fp16")];
tensor<fp16, [512, 512]> var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54096320)))];
tensor<fp16, [512]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54620672)))];
tensor<fp16, [1, ?, 512]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_109_to_fp16, x = var_94_cast_fp16)[name = string("linear_1_cast_fp16")];
tensor<fp16, [512, 512]> var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54621760)))];
tensor<fp16, [512]> var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55146112)))];
tensor<fp16, [1, ?, 512]> linear_2_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = var_94_cast_fp16)[name = string("linear_2_cast_fp16")];
tensor<int32, [3]> var_116_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_116_shape_cast_fp16")];
int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
tensor<uint16, [3]> var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_78")];
uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_77")];
int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")];
tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")];
int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(512)];
int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
tensor<int32, [3]> var_132_begin_0 = const()[name = string("op_132_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_132_end_mask_0 = const()[name = string("op_132_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_132_cast_fp16 = slice_by_index(begin = var_132_begin_0, end = concat_10, end_mask = var_132_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_132_cast_fp16")];
tensor<int32, [3]> var_135_begin_0 = const()[name = string("op_135_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_135_end_mask_0 = const()[name = string("op_135_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_135_cast_fp16 = slice_by_index(begin = var_135_begin_0, end = concat_10, end_mask = var_135_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_135_cast_fp16")];
tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_145_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_145_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_3_cast_fp16 = mul(x = var_145_cast_fp16, y = const_30_to_fp16)[name = string("q_3_cast_fp16")];
tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_152_cast_fp16 = reshape(shape = concat_13x, x = var_132_cast_fp16)[name = string("op_152_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> k_5_cast_fp16 = mul(x = var_152_cast_fp16, y = const_31_to_fp16)[name = string("k_5_cast_fp16")];
tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_159_cast_fp16 = reshape(shape = concat_14x, x = var_135_cast_fp16)[name = string("op_159_cast_fp16")];
tensor<int32, [4]> var_160 = const()[name = string("op_160"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, ?]> transpose_50 = transpose(perm = transpose_50_perm_0, x = k_5_cast_fp16)[name = string("transpose_118")];
tensor<fp16, [1, 8, ?, 64]> transpose_49 = transpose(perm = transpose_49_perm_0, x = q_3_cast_fp16)[name = string("transpose_119")];
tensor<fp16, [1, 8, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_49, y = transpose_50)[name = string("qk_1_cast_fp16")];
int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
tensor<int32, [2]> var_163_begin_0 = const()[name = string("op_163_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_163_end_mask_0 = const()[name = string("op_163_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55147200)))];
tensor<fp16, [?, 448]> var_163_cast_fp16 = slice_by_index(begin = var_163_begin_0, end = concat_15, end_mask = var_163_end_mask_0, x = mask_to_fp16)[name = string("op_163_cast_fp16")];
int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
tensor<int32, [2]> var_164_begin_0 = const()[name = string("op_164_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_164_end_mask_0 = const()[name = string("op_164_end_mask_0"), val = tensor<bool, [2]>([true, false])];
tensor<fp16, [?, ?]> var_164_cast_fp16 = slice_by_index(begin = var_164_begin_0, end = concat_16, end_mask = var_164_end_mask_0, x = var_163_cast_fp16)[name = string("op_164_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_164_cast_fp16)[name = string("qk_3_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> var_167_cast_fp16 = softmax(axis = var_76, x = qk_3_cast_fp16)[name = string("op_167_cast_fp16")];
bool var_169_transpose_x_0 = const()[name = string("op_169_transpose_x_0"), val = bool(false)];
bool var_169_transpose_y_0 = const()[name = string("op_169_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, ?, 64]> v_5_cast_fp16 = transpose(perm = var_160, x = var_159_cast_fp16)[name = string("transpose_120")];
tensor<fp16, [1, 8, ?, 64]> var_169_cast_fp16 = matmul(transpose_x = var_169_transpose_x_0, transpose_y = var_169_transpose_y_0, x = var_167_cast_fp16, y = v_5_cast_fp16)[name = string("op_169_cast_fp16")];
tensor<int32, [4]> var_170 = const()[name = string("op_170"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_171_cast_fp16 = transpose(perm = var_170, x = var_169_cast_fp16)[name = string("transpose_117")];
tensor<fp16, [1, ?, 512]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_171_cast_fp16)[name = string("x_7_cast_fp16")];
tensor<fp16, [512, 512]> var_175_to_fp16 = const()[name = string("op_175_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55548672)))];
tensor<fp16, [512]> var_176_to_fp16 = const()[name = string("op_176_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56073024)))];
tensor<fp16, [1, ?, 512]> linear_3_cast_fp16 = linear(bias = var_176_to_fp16, weight = var_175_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
tensor<int32, [1]> var_183_axes_0 = const()[name = string("op_183_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56074112)))];
tensor<fp16, [512]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56075200)))];
tensor<fp16, [1, ?, 512]> var_183_cast_fp16 = layer_norm(axes = var_183_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_183_cast_fp16")];
tensor<fp16, [512, 512]> var_192_to_fp16 = const()[name = string("op_192_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56076288)))];
tensor<fp16, [512]> var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56600640)))];
tensor<fp16, [1, ?, 512]> linear_4_cast_fp16 = linear(bias = var_193_to_fp16, weight = var_192_to_fp16, x = var_183_cast_fp16)[name = string("linear_4_cast_fp16")];
tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56601728)))];
tensor<fp16, [1, 1500, 512]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_213_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_213_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_7_cast_fp16 = mul(x = var_213_cast_fp16, y = const_32_to_fp16)[name = string("q_7_cast_fp16")];
tensor<int32, [4]> var_219 = const()[name = string("op_219"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_220_cast_fp16 = reshape(shape = var_219, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_220_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, 1500, 8, 64]> k_9_cast_fp16 = mul(x = var_220_cast_fp16, y = const_33_to_fp16)[name = string("k_9_cast_fp16")];
tensor<int32, [4]> var_226 = const()[name = string("op_226"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_227_cast_fp16 = reshape(shape = var_226, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_227_cast_fp16")];
tensor<int32, [4]> var_228 = const()[name = string("op_228"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, 1500]> transpose_52 = transpose(perm = transpose_52_perm_0, x = k_9_cast_fp16)[name = string("transpose_114")];
tensor<fp16, [1, 8, ?, 64]> transpose_51 = transpose(perm = transpose_51_perm_0, x = q_7_cast_fp16)[name = string("transpose_115")];
tensor<fp16, [1, 8, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_51, y = transpose_52)[name = string("qk_5_cast_fp16")];
tensor<fp16, [1, 8, ?, 1500]> var_232_cast_fp16 = softmax(axis = var_76, x = qk_5_cast_fp16)[name = string("op_232_cast_fp16")];
bool var_234_transpose_x_0 = const()[name = string("op_234_transpose_x_0"), val = bool(false)];
bool var_234_transpose_y_0 = const()[name = string("op_234_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_228, x = var_227_cast_fp16)[name = string("transpose_116")];
tensor<fp16, [1, 8, ?, 64]> var_234_cast_fp16 = matmul(transpose_x = var_234_transpose_x_0, transpose_y = var_234_transpose_y_0, x = var_232_cast_fp16, y = v_9_cast_fp16)[name = string("op_234_cast_fp16")];
tensor<int32, [4]> var_235 = const()[name = string("op_235"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_236_cast_fp16 = transpose(perm = var_235, x = var_234_cast_fp16)[name = string("transpose_113")];
tensor<fp16, [1, ?, 512]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_236_cast_fp16)[name = string("x_13_cast_fp16")];
tensor<fp16, [512, 512]> var_240_to_fp16 = const()[name = string("op_240_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58137792)))];
tensor<fp16, [512]> var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58662144)))];
tensor<fp16, [1, ?, 512]> linear_5_cast_fp16 = linear(bias = var_241_to_fp16, weight = var_240_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
tensor<int32, [1]> var_248_axes_0 = const()[name = string("op_248_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58663232)))];
tensor<fp16, [512]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58664320)))];
tensor<fp16, [1, ?, 512]> var_248_cast_fp16 = layer_norm(axes = var_248_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_248_cast_fp16")];
tensor<fp16, [2048, 512]> var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58665408)))];
tensor<fp16, [2048]> var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60762624)))];
tensor<fp16, [1, ?, 2048]> linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_248_cast_fp16)[name = string("linear_6_cast_fp16")];
string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
tensor<fp16, [1, ?, 2048]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
tensor<fp16, [512, 2048]> var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60766784)))];
tensor<fp16, [512]> var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62864000)))];
tensor<fp16, [1, ?, 512]> linear_7_cast_fp16 = linear(bias = var_264_to_fp16, weight = var_263_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 512])];
tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_5_cast_fp16")];
tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 512])];
tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_5_cast_fp16")];
tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 512])];
tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 512])];
tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
int32 var_287 = const()[name = string("op_287"), val = int32(-1)];
tensor<int32, [1]> var_305_axes_0 = const()[name = string("op_305_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62865088)))];
tensor<fp16, [512]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62866176)))];
fp16 var_293_to_fp16 = const()[name = string("op_293_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_305_cast_fp16 = layer_norm(axes = var_305_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_305_cast_fp16")];
tensor<fp16, [512, 512]> var_316_to_fp16 = const()[name = string("op_316_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62867264)))];
tensor<fp16, [512]> var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63391616)))];
tensor<fp16, [1, ?, 512]> linear_8_cast_fp16 = linear(bias = var_317_to_fp16, weight = var_316_to_fp16, x = var_305_cast_fp16)[name = string("linear_8_cast_fp16")];
tensor<fp16, [512, 512]> var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63392704)))];
tensor<fp16, [1, ?, 512]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_320_to_fp16, x = var_305_cast_fp16)[name = string("linear_9_cast_fp16")];
tensor<fp16, [512, 512]> var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63917056)))];
tensor<fp16, [512]> var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64441408)))];
tensor<fp16, [1, ?, 512]> linear_10_cast_fp16 = linear(bias = var_325_to_fp16, weight = var_324_to_fp16, x = var_305_cast_fp16)[name = string("linear_10_cast_fp16")];
tensor<int32, [3]> var_327_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_327_shape_cast_fp16")];
int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
string var_327_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_327_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
tensor<uint16, [3]> var_327_shape_cast_fp16_to_uint16 = cast(dtype = var_327_shape_cast_fp16_to_uint16_dtype_0, x = var_327_shape_cast_fp16)[name = string("cast_76")];
uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_327_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_75")];
int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_14 = read_state(input = k_cache1)[name = string("coreml_update_state_14")];
tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_15 = read_state(input = v_cache1)[name = string("coreml_update_state_15")];
int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(512)];
int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
tensor<int32, [3]> var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = concat_32, end_mask = var_343_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_343_cast_fp16")];
tensor<int32, [3]> var_346_begin_0 = const()[name = string("op_346_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_346_end_mask_0 = const()[name = string("op_346_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = concat_32, end_mask = var_346_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_346_cast_fp16")];
tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_356_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_356_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_11_cast_fp16 = mul(x = var_356_cast_fp16, y = const_34_to_fp16)[name = string("q_11_cast_fp16")];
tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_363_cast_fp16 = reshape(shape = concat_35x, x = var_343_cast_fp16)[name = string("op_363_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> k_15_cast_fp16 = mul(x = var_363_cast_fp16, y = const_35_to_fp16)[name = string("k_15_cast_fp16")];
tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_370_cast_fp16 = reshape(shape = concat_36x, x = var_346_cast_fp16)[name = string("op_370_cast_fp16")];
tensor<int32, [4]> var_371 = const()[name = string("op_371"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, ?]> transpose_54 = transpose(perm = transpose_54_perm_0, x = k_15_cast_fp16)[name = string("transpose_110")];
tensor<fp16, [1, 8, ?, 64]> transpose_53 = transpose(perm = transpose_53_perm_0, x = q_11_cast_fp16)[name = string("transpose_111")];
tensor<fp16, [1, 8, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_53, y = transpose_54)[name = string("qk_7_cast_fp16")];
int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
tensor<int32, [2]> var_374_begin_0 = const()[name = string("op_374_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_374_end_mask_0 = const()[name = string("op_374_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [?, 448]> var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = concat_37, end_mask = var_374_end_mask_0, x = mask_to_fp16)[name = string("op_374_cast_fp16")];
int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
tensor<int32, [2]> var_375_begin_0 = const()[name = string("op_375_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_375_end_mask_0 = const()[name = string("op_375_end_mask_0"), val = tensor<bool, [2]>([true, false])];
tensor<fp16, [?, ?]> var_375_cast_fp16 = slice_by_index(begin = var_375_begin_0, end = concat_38, end_mask = var_375_end_mask_0, x = var_374_cast_fp16)[name = string("op_375_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_375_cast_fp16)[name = string("qk_9_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> var_378_cast_fp16 = softmax(axis = var_287, x = qk_9_cast_fp16)[name = string("op_378_cast_fp16")];
bool var_380_transpose_x_0 = const()[name = string("op_380_transpose_x_0"), val = bool(false)];
bool var_380_transpose_y_0 = const()[name = string("op_380_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, ?, 64]> v_15_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_112")];
tensor<fp16, [1, 8, ?, 64]> var_380_cast_fp16 = matmul(transpose_x = var_380_transpose_x_0, transpose_y = var_380_transpose_y_0, x = var_378_cast_fp16, y = v_15_cast_fp16)[name = string("op_380_cast_fp16")];
tensor<int32, [4]> var_381 = const()[name = string("op_381"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_382_cast_fp16 = transpose(perm = var_381, x = var_380_cast_fp16)[name = string("transpose_109")];
tensor<fp16, [1, ?, 512]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_382_cast_fp16)[name = string("x_25_cast_fp16")];
tensor<fp16, [512, 512]> var_386_to_fp16 = const()[name = string("op_386_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64442496)))];
tensor<fp16, [512]> var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64966848)))];
tensor<fp16, [1, ?, 512]> linear_11_cast_fp16 = linear(bias = var_387_to_fp16, weight = var_386_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
tensor<int32, [1]> var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64967936)))];
tensor<fp16, [512]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64969024)))];
tensor<fp16, [1, ?, 512]> var_394_cast_fp16 = layer_norm(axes = var_394_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_394_cast_fp16")];
tensor<fp16, [512, 512]> var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64970112)))];
tensor<fp16, [512]> var_404_to_fp16 = const()[name = string("op_404_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65494464)))];
tensor<fp16, [1, ?, 512]> linear_12_cast_fp16 = linear(bias = var_404_to_fp16, weight = var_403_to_fp16, x = var_394_cast_fp16)[name = string("linear_12_cast_fp16")];
tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_424_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_424_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_36_to_fp16 = const()[name = string("const_36_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_15_cast_fp16 = mul(x = var_424_cast_fp16, y = const_36_to_fp16)[name = string("q_15_cast_fp16")];
tensor<int32, [4]> var_430 = const()[name = string("op_430"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_431_cast_fp16 = reshape(shape = var_430, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_431_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_37_to_fp16 = const()[name = string("const_37_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, 1500, 8, 64]> k_19_cast_fp16 = mul(x = var_431_cast_fp16, y = const_37_to_fp16)[name = string("k_19_cast_fp16")];
tensor<int32, [4]> var_437 = const()[name = string("op_437"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_438_cast_fp16 = reshape(shape = var_437, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_438_cast_fp16")];
tensor<int32, [4]> var_439 = const()[name = string("op_439"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, 1500]> transpose_56 = transpose(perm = transpose_56_perm_0, x = k_19_cast_fp16)[name = string("transpose_106")];
tensor<fp16, [1, 8, ?, 64]> transpose_55 = transpose(perm = transpose_55_perm_0, x = q_15_cast_fp16)[name = string("transpose_107")];
tensor<fp16, [1, 8, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_55, y = transpose_56)[name = string("qk_11_cast_fp16")];
tensor<fp16, [1, 8, ?, 1500]> var_443_cast_fp16 = softmax(axis = var_287, x = qk_11_cast_fp16)[name = string("op_443_cast_fp16")];
bool var_445_transpose_x_0 = const()[name = string("op_445_transpose_x_0"), val = bool(false)];
bool var_445_transpose_y_0 = const()[name = string("op_445_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_439, x = var_438_cast_fp16)[name = string("transpose_108")];
tensor<fp16, [1, 8, ?, 64]> var_445_cast_fp16 = matmul(transpose_x = var_445_transpose_x_0, transpose_y = var_445_transpose_y_0, x = var_443_cast_fp16, y = v_19_cast_fp16)[name = string("op_445_cast_fp16")];
tensor<int32, [4]> var_446 = const()[name = string("op_446"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_447_cast_fp16 = transpose(perm = var_446, x = var_445_cast_fp16)[name = string("transpose_105")];
tensor<fp16, [1, ?, 512]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_447_cast_fp16)[name = string("x_31_cast_fp16")];
tensor<fp16, [512, 512]> var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65495552)))];
tensor<fp16, [512]> var_452_to_fp16 = const()[name = string("op_452_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66019904)))];
tensor<fp16, [1, ?, 512]> linear_13_cast_fp16 = linear(bias = var_452_to_fp16, weight = var_451_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
tensor<int32, [1]> var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66020992)))];
tensor<fp16, [512]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66022080)))];
tensor<fp16, [1, ?, 512]> var_459_cast_fp16 = layer_norm(axes = var_459_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_459_cast_fp16")];
tensor<fp16, [2048, 512]> var_468_to_fp16 = const()[name = string("op_468_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66023168)))];
tensor<fp16, [2048]> var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68120384)))];
tensor<fp16, [1, ?, 2048]> linear_14_cast_fp16 = linear(bias = var_469_to_fp16, weight = var_468_to_fp16, x = var_459_cast_fp16)[name = string("linear_14_cast_fp16")];
string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
tensor<fp16, [1, ?, 2048]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
tensor<fp16, [512, 2048]> var_474_to_fp16 = const()[name = string("op_474_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68124544)))];
tensor<fp16, [512]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70221760)))];
tensor<fp16, [1, ?, 512]> linear_15_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 512])];
tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_14)[name = string("k_cache_9_cast_fp16")];
tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 512])];
tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_15)[name = string("v_cache_9_cast_fp16")];
tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 512])];
tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 512])];
tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
int32 var_498 = const()[name = string("op_498"), val = int32(-1)];
tensor<int32, [1]> var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70222848)))];
tensor<fp16, [512]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70223936)))];
fp16 var_504_to_fp16 = const()[name = string("op_504_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_516_cast_fp16 = layer_norm(axes = var_516_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_516_cast_fp16")];
tensor<fp16, [512, 512]> var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70225024)))];
tensor<fp16, [512]> var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70749376)))];
tensor<fp16, [1, ?, 512]> linear_16_cast_fp16 = linear(bias = var_528_to_fp16, weight = var_527_to_fp16, x = var_516_cast_fp16)[name = string("linear_16_cast_fp16")];
tensor<fp16, [512, 512]> var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70750464)))];
tensor<fp16, [1, ?, 512]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_531_to_fp16, x = var_516_cast_fp16)[name = string("linear_17_cast_fp16")];
tensor<fp16, [512, 512]> var_535_to_fp16 = const()[name = string("op_535_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71274816)))];
tensor<fp16, [512]> var_536_to_fp16 = const()[name = string("op_536_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71799168)))];
tensor<fp16, [1, ?, 512]> linear_18_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_516_cast_fp16)[name = string("linear_18_cast_fp16")];
tensor<int32, [3]> var_538_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_538_shape_cast_fp16")];
int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
string var_538_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_538_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
tensor<uint16, [3]> var_538_shape_cast_fp16_to_uint16 = cast(dtype = var_538_shape_cast_fp16_to_uint16_dtype_0, x = var_538_shape_cast_fp16)[name = string("cast_74")];
uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_538_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_73")];
int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_14)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_16_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_16 = read_state(input = k_cache1)[name = string("coreml_update_state_16")];
tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_15)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_17_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_17 = read_state(input = v_cache1)[name = string("coreml_update_state_17")];
int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(512)];
int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
tensor<int32, [3]> var_554_begin_0 = const()[name = string("op_554_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_554_end_mask_0 = const()[name = string("op_554_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = concat_54, end_mask = var_554_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_554_cast_fp16")];
tensor<int32, [3]> var_557_begin_0 = const()[name = string("op_557_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_557_end_mask_0 = const()[name = string("op_557_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_557_cast_fp16 = slice_by_index(begin = var_557_begin_0, end = concat_54, end_mask = var_557_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_557_cast_fp16")];
tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_567_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_567_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_38_to_fp16 = const()[name = string("const_38_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_19_cast_fp16 = mul(x = var_567_cast_fp16, y = const_38_to_fp16)[name = string("q_19_cast_fp16")];
tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_574_cast_fp16 = reshape(shape = concat_57x, x = var_554_cast_fp16)[name = string("op_574_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_39_to_fp16 = const()[name = string("const_39_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> k_25_cast_fp16 = mul(x = var_574_cast_fp16, y = const_39_to_fp16)[name = string("k_25_cast_fp16")];
tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_581_cast_fp16 = reshape(shape = concat_58x, x = var_557_cast_fp16)[name = string("op_581_cast_fp16")];
tensor<int32, [4]> var_582 = const()[name = string("op_582"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, ?]> transpose_58 = transpose(perm = transpose_58_perm_0, x = k_25_cast_fp16)[name = string("transpose_102")];
tensor<fp16, [1, 8, ?, 64]> transpose_57 = transpose(perm = transpose_57_perm_0, x = q_19_cast_fp16)[name = string("transpose_103")];
tensor<fp16, [1, 8, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_57, y = transpose_58)[name = string("qk_13_cast_fp16")];
int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
tensor<int32, [2]> var_585_begin_0 = const()[name = string("op_585_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_585_end_mask_0 = const()[name = string("op_585_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [?, 448]> var_585_cast_fp16 = slice_by_index(begin = var_585_begin_0, end = concat_59, end_mask = var_585_end_mask_0, x = mask_to_fp16)[name = string("op_585_cast_fp16")];
int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
tensor<int32, [2]> var_586_begin_0 = const()[name = string("op_586_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_586_end_mask_0 = const()[name = string("op_586_end_mask_0"), val = tensor<bool, [2]>([true, false])];
tensor<fp16, [?, ?]> var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = concat_60, end_mask = var_586_end_mask_0, x = var_585_cast_fp16)[name = string("op_586_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_586_cast_fp16)[name = string("qk_15_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> var_589_cast_fp16 = softmax(axis = var_498, x = qk_15_cast_fp16)[name = string("op_589_cast_fp16")];
bool var_591_transpose_x_0 = const()[name = string("op_591_transpose_x_0"), val = bool(false)];
bool var_591_transpose_y_0 = const()[name = string("op_591_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, ?, 64]> v_25_cast_fp16 = transpose(perm = var_582, x = var_581_cast_fp16)[name = string("transpose_104")];
tensor<fp16, [1, 8, ?, 64]> var_591_cast_fp16 = matmul(transpose_x = var_591_transpose_x_0, transpose_y = var_591_transpose_y_0, x = var_589_cast_fp16, y = v_25_cast_fp16)[name = string("op_591_cast_fp16")];
tensor<int32, [4]> var_592 = const()[name = string("op_592"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_593_cast_fp16 = transpose(perm = var_592, x = var_591_cast_fp16)[name = string("transpose_101")];
tensor<fp16, [1, ?, 512]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_593_cast_fp16)[name = string("x_43_cast_fp16")];
tensor<fp16, [512, 512]> var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71800256)))];
tensor<fp16, [512]> var_598_to_fp16 = const()[name = string("op_598_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72324608)))];
tensor<fp16, [1, ?, 512]> linear_19_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
tensor<int32, [1]> var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72325696)))];
tensor<fp16, [512]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72326784)))];
tensor<fp16, [1, ?, 512]> var_605_cast_fp16 = layer_norm(axes = var_605_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_605_cast_fp16")];
tensor<fp16, [512, 512]> var_614_to_fp16 = const()[name = string("op_614_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72327872)))];
tensor<fp16, [512]> var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72852224)))];
tensor<fp16, [1, ?, 512]> linear_20_cast_fp16 = linear(bias = var_615_to_fp16, weight = var_614_to_fp16, x = var_605_cast_fp16)[name = string("linear_20_cast_fp16")];
tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_635_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_635_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_23_cast_fp16 = mul(x = var_635_cast_fp16, y = const_40_to_fp16)[name = string("q_23_cast_fp16")];
tensor<int32, [4]> var_641 = const()[name = string("op_641"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_642_cast_fp16 = reshape(shape = var_641, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_642_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, 1500, 8, 64]> k_29_cast_fp16 = mul(x = var_642_cast_fp16, y = const_41_to_fp16)[name = string("k_29_cast_fp16")];
tensor<int32, [4]> var_648 = const()[name = string("op_648"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_649_cast_fp16 = reshape(shape = var_648, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_649_cast_fp16")];
tensor<int32, [4]> var_650 = const()[name = string("op_650"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, 1500]> transpose_60 = transpose(perm = transpose_60_perm_0, x = k_29_cast_fp16)[name = string("transpose_98")];
tensor<fp16, [1, 8, ?, 64]> transpose_59 = transpose(perm = transpose_59_perm_0, x = q_23_cast_fp16)[name = string("transpose_99")];
tensor<fp16, [1, 8, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_59, y = transpose_60)[name = string("qk_17_cast_fp16")];
tensor<fp16, [1, 8, ?, 1500]> var_654_cast_fp16 = softmax(axis = var_498, x = qk_17_cast_fp16)[name = string("op_654_cast_fp16")];
bool var_656_transpose_x_0 = const()[name = string("op_656_transpose_x_0"), val = bool(false)];
bool var_656_transpose_y_0 = const()[name = string("op_656_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_650, x = var_649_cast_fp16)[name = string("transpose_100")];
tensor<fp16, [1, 8, ?, 64]> var_656_cast_fp16 = matmul(transpose_x = var_656_transpose_x_0, transpose_y = var_656_transpose_y_0, x = var_654_cast_fp16, y = v_29_cast_fp16)[name = string("op_656_cast_fp16")];
tensor<int32, [4]> var_657 = const()[name = string("op_657"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_658_cast_fp16 = transpose(perm = var_657, x = var_656_cast_fp16)[name = string("transpose_97")];
tensor<fp16, [1, ?, 512]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_658_cast_fp16)[name = string("x_49_cast_fp16")];
tensor<fp16, [512, 512]> var_662_to_fp16 = const()[name = string("op_662_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72853312)))];
tensor<fp16, [512]> var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73377664)))];
tensor<fp16, [1, ?, 512]> linear_21_cast_fp16 = linear(bias = var_663_to_fp16, weight = var_662_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
tensor<int32, [1]> var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73378752)))];
tensor<fp16, [512]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73379840)))];
tensor<fp16, [1, ?, 512]> var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_670_cast_fp16")];
tensor<fp16, [2048, 512]> var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73380928)))];
tensor<fp16, [2048]> var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75478144)))];
tensor<fp16, [1, ?, 2048]> linear_22_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = var_670_cast_fp16)[name = string("linear_22_cast_fp16")];
string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
tensor<fp16, [1, ?, 2048]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
tensor<fp16, [512, 2048]> var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75482304)))];
tensor<fp16, [512]> var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77579520)))];
tensor<fp16, [1, ?, 512]> linear_23_cast_fp16 = linear(bias = var_686_to_fp16, weight = var_685_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 512])];
tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_16)[name = string("k_cache_13_cast_fp16")];
tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 512])];
tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_17)[name = string("v_cache_13_cast_fp16")];
tensor<int32, [4]> k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
tensor<int32, [4]> k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 512])];
tensor<bool, [4]> k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")];
tensor<int32, [4]> v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
tensor<int32, [4]> v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 512])];
tensor<bool, [4]> v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")];
int32 var_709 = const()[name = string("op_709"), val = int32(-1)];
tensor<int32, [1]> var_727_axes_0 = const()[name = string("op_727_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77580608)))];
tensor<fp16, [512]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77581696)))];
fp16 var_715_to_fp16 = const()[name = string("op_715_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_727_cast_fp16 = layer_norm(axes = var_727_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_727_cast_fp16")];
tensor<fp16, [512, 512]> var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77582784)))];
tensor<fp16, [512]> var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78107136)))];
tensor<fp16, [1, ?, 512]> linear_24_cast_fp16 = linear(bias = var_739_to_fp16, weight = var_738_to_fp16, x = var_727_cast_fp16)[name = string("linear_24_cast_fp16")];
tensor<fp16, [512, 512]> var_742_to_fp16 = const()[name = string("op_742_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78108224)))];
tensor<fp16, [1, ?, 512]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_742_to_fp16, x = var_727_cast_fp16)[name = string("linear_25_cast_fp16")];
tensor<fp16, [512, 512]> var_746_to_fp16 = const()[name = string("op_746_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78632576)))];
tensor<fp16, [512]> var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79156928)))];
tensor<fp16, [1, ?, 512]> linear_26_cast_fp16 = linear(bias = var_747_to_fp16, weight = var_746_to_fp16, x = var_727_cast_fp16)[name = string("linear_26_cast_fp16")];
tensor<int32, [3]> var_749_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_749_shape_cast_fp16")];
int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
string var_749_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_749_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
tensor<uint16, [3]> var_749_shape_cast_fp16_to_uint16 = cast(dtype = var_749_shape_cast_fp16_to_uint16_dtype_0, x = var_749_shape_cast_fp16)[name = string("cast_72")];
uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_749_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_71")];
int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")];
tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")];
tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_16)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_18_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_18 = read_state(input = k_cache1)[name = string("coreml_update_state_18")];
tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_17)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_19_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_19 = read_state(input = v_cache1)[name = string("coreml_update_state_19")];
int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(512)];
int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")];
tensor<int32, [3]> var_765_begin_0 = const()[name = string("op_765_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_765_end_mask_0 = const()[name = string("op_765_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_765_cast_fp16 = slice_by_index(begin = var_765_begin_0, end = concat_76, end_mask = var_765_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_765_cast_fp16")];
tensor<int32, [3]> var_768_begin_0 = const()[name = string("op_768_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_768_end_mask_0 = const()[name = string("op_768_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_768_cast_fp16 = slice_by_index(begin = var_768_begin_0, end = concat_76, end_mask = var_768_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_768_cast_fp16")];
tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_778_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_778_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_27_cast_fp16 = mul(x = var_778_cast_fp16, y = const_42_to_fp16)[name = string("q_27_cast_fp16")];
tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_785_cast_fp16 = reshape(shape = concat_79x, x = var_765_cast_fp16)[name = string("op_785_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> k_35_cast_fp16 = mul(x = var_785_cast_fp16, y = const_43_to_fp16)[name = string("k_35_cast_fp16")];
tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_792_cast_fp16 = reshape(shape = concat_80x, x = var_768_cast_fp16)[name = string("op_792_cast_fp16")];
tensor<int32, [4]> var_793 = const()[name = string("op_793"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, ?]> transpose_62 = transpose(perm = transpose_62_perm_0, x = k_35_cast_fp16)[name = string("transpose_94")];
tensor<fp16, [1, 8, ?, 64]> transpose_61 = transpose(perm = transpose_61_perm_0, x = q_27_cast_fp16)[name = string("transpose_95")];
tensor<fp16, [1, 8, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_61, y = transpose_62)[name = string("qk_19_cast_fp16")];
int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
tensor<int32, [2]> var_796_begin_0 = const()[name = string("op_796_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_796_end_mask_0 = const()[name = string("op_796_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [?, 448]> var_796_cast_fp16 = slice_by_index(begin = var_796_begin_0, end = concat_81, end_mask = var_796_end_mask_0, x = mask_to_fp16)[name = string("op_796_cast_fp16")];
int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
tensor<int32, [2]> var_797_begin_0 = const()[name = string("op_797_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_797_end_mask_0 = const()[name = string("op_797_end_mask_0"), val = tensor<bool, [2]>([true, false])];
tensor<fp16, [?, ?]> var_797_cast_fp16 = slice_by_index(begin = var_797_begin_0, end = concat_82, end_mask = var_797_end_mask_0, x = var_796_cast_fp16)[name = string("op_797_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_797_cast_fp16)[name = string("qk_21_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> var_800_cast_fp16 = softmax(axis = var_709, x = qk_21_cast_fp16)[name = string("op_800_cast_fp16")];
bool var_802_transpose_x_0 = const()[name = string("op_802_transpose_x_0"), val = bool(false)];
bool var_802_transpose_y_0 = const()[name = string("op_802_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, ?, 64]> v_35_cast_fp16 = transpose(perm = var_793, x = var_792_cast_fp16)[name = string("transpose_96")];
tensor<fp16, [1, 8, ?, 64]> var_802_cast_fp16 = matmul(transpose_x = var_802_transpose_x_0, transpose_y = var_802_transpose_y_0, x = var_800_cast_fp16, y = v_35_cast_fp16)[name = string("op_802_cast_fp16")];
tensor<int32, [4]> var_803 = const()[name = string("op_803"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_804_cast_fp16 = transpose(perm = var_803, x = var_802_cast_fp16)[name = string("transpose_93")];
tensor<fp16, [1, ?, 512]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_804_cast_fp16)[name = string("x_61_cast_fp16")];
tensor<fp16, [512, 512]> var_808_to_fp16 = const()[name = string("op_808_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79158016)))];
tensor<fp16, [512]> var_809_to_fp16 = const()[name = string("op_809_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79682368)))];
tensor<fp16, [1, ?, 512]> linear_27_cast_fp16 = linear(bias = var_809_to_fp16, weight = var_808_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
tensor<int32, [1]> var_816_axes_0 = const()[name = string("op_816_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79683456)))];
tensor<fp16, [512]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79684544)))];
tensor<fp16, [1, ?, 512]> var_816_cast_fp16 = layer_norm(axes = var_816_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_816_cast_fp16")];
tensor<fp16, [512, 512]> var_825_to_fp16 = const()[name = string("op_825_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79685632)))];
tensor<fp16, [512]> var_826_to_fp16 = const()[name = string("op_826_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80209984)))];
tensor<fp16, [1, ?, 512]> linear_28_cast_fp16 = linear(bias = var_826_to_fp16, weight = var_825_to_fp16, x = var_816_cast_fp16)[name = string("linear_28_cast_fp16")];
tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_846_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_846_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_31_cast_fp16 = mul(x = var_846_cast_fp16, y = const_44_to_fp16)[name = string("q_31_cast_fp16")];
tensor<int32, [4]> var_852 = const()[name = string("op_852"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_853_cast_fp16 = reshape(shape = var_852, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_853_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, 1500, 8, 64]> k_39_cast_fp16 = mul(x = var_853_cast_fp16, y = const_45_to_fp16)[name = string("k_39_cast_fp16")];
tensor<int32, [4]> var_859 = const()[name = string("op_859"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_860_cast_fp16 = reshape(shape = var_859, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_860_cast_fp16")];
tensor<int32, [4]> var_861 = const()[name = string("op_861"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, 1500]> transpose_64 = transpose(perm = transpose_64_perm_0, x = k_39_cast_fp16)[name = string("transpose_90")];
tensor<fp16, [1, 8, ?, 64]> transpose_63 = transpose(perm = transpose_63_perm_0, x = q_31_cast_fp16)[name = string("transpose_91")];
tensor<fp16, [1, 8, ?, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_63, y = transpose_64)[name = string("qk_23_cast_fp16")];
tensor<fp16, [1, 8, ?, 1500]> var_865_cast_fp16 = softmax(axis = var_709, x = qk_23_cast_fp16)[name = string("op_865_cast_fp16")];
bool var_867_transpose_x_0 = const()[name = string("op_867_transpose_x_0"), val = bool(false)];
bool var_867_transpose_y_0 = const()[name = string("op_867_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_861, x = var_860_cast_fp16)[name = string("transpose_92")];
tensor<fp16, [1, 8, ?, 64]> var_867_cast_fp16 = matmul(transpose_x = var_867_transpose_x_0, transpose_y = var_867_transpose_y_0, x = var_865_cast_fp16, y = v_39_cast_fp16)[name = string("op_867_cast_fp16")];
tensor<int32, [4]> var_868 = const()[name = string("op_868"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_869_cast_fp16 = transpose(perm = var_868, x = var_867_cast_fp16)[name = string("transpose_89")];
tensor<fp16, [1, ?, 512]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_869_cast_fp16)[name = string("x_67_cast_fp16")];
tensor<fp16, [512, 512]> var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80211072)))];
tensor<fp16, [512]> var_874_to_fp16 = const()[name = string("op_874_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80735424)))];
tensor<fp16, [1, ?, 512]> linear_29_cast_fp16 = linear(bias = var_874_to_fp16, weight = var_873_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
tensor<int32, [1]> var_881_axes_0 = const()[name = string("op_881_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80736512)))];
tensor<fp16, [512]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80737600)))];
tensor<fp16, [1, ?, 512]> var_881_cast_fp16 = layer_norm(axes = var_881_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_881_cast_fp16")];
tensor<fp16, [2048, 512]> var_890_to_fp16 = const()[name = string("op_890_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80738688)))];
tensor<fp16, [2048]> var_891_to_fp16 = const()[name = string("op_891_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82835904)))];
tensor<fp16, [1, ?, 2048]> linear_30_cast_fp16 = linear(bias = var_891_to_fp16, weight = var_890_to_fp16, x = var_881_cast_fp16)[name = string("linear_30_cast_fp16")];
string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
tensor<fp16, [1, ?, 2048]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
tensor<fp16, [512, 2048]> var_896_to_fp16 = const()[name = string("op_896_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82840064)))];
tensor<fp16, [512]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84937280)))];
tensor<fp16, [1, ?, 512]> linear_31_cast_fp16 = linear(bias = var_897_to_fp16, weight = var_896_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
tensor<int32, [4]> k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
tensor<int32, [4]> k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 512])];
tensor<bool, [4]> k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_18)[name = string("k_cache_17_cast_fp16")];
tensor<int32, [4]> v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
tensor<int32, [4]> v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 512])];
tensor<bool, [4]> v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_19)[name = string("v_cache_17_cast_fp16")];
tensor<int32, [4]> k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
tensor<int32, [4]> k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 512])];
tensor<bool, [4]> k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")];
tensor<int32, [4]> v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
tensor<int32, [4]> v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 512])];
tensor<bool, [4]> v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")];
int32 var_920 = const()[name = string("op_920"), val = int32(-1)];
tensor<int32, [1]> var_938_axes_0 = const()[name = string("op_938_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84938368)))];
tensor<fp16, [512]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84939456)))];
fp16 var_926_to_fp16 = const()[name = string("op_926_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_938_cast_fp16 = layer_norm(axes = var_938_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_938_cast_fp16")];
tensor<fp16, [512, 512]> var_949_to_fp16 = const()[name = string("op_949_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84940544)))];
tensor<fp16, [512]> var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85464896)))];
tensor<fp16, [1, ?, 512]> linear_32_cast_fp16 = linear(bias = var_950_to_fp16, weight = var_949_to_fp16, x = var_938_cast_fp16)[name = string("linear_32_cast_fp16")];
tensor<fp16, [512, 512]> var_953_to_fp16 = const()[name = string("op_953_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85465984)))];
tensor<fp16, [1, ?, 512]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_953_to_fp16, x = var_938_cast_fp16)[name = string("linear_33_cast_fp16")];
tensor<fp16, [512, 512]> var_957_to_fp16 = const()[name = string("op_957_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85990336)))];
tensor<fp16, [512]> var_958_to_fp16 = const()[name = string("op_958_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86514688)))];
tensor<fp16, [1, ?, 512]> linear_34_cast_fp16 = linear(bias = var_958_to_fp16, weight = var_957_to_fp16, x = var_938_cast_fp16)[name = string("linear_34_cast_fp16")];
tensor<int32, [3]> var_960_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_960_shape_cast_fp16")];
int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
string var_960_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_960_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
tensor<uint16, [3]> var_960_shape_cast_fp16_to_uint16 = cast(dtype = var_960_shape_cast_fp16_to_uint16_dtype_0, x = var_960_shape_cast_fp16)[name = string("cast_70")];
uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_960_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_69")];
int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")];
tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")];
tensor<int32, [1]> concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor<int32, [1]>([4])];
int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")];
tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")];
tensor<int32, [4]> k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_18)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")];
write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_20_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_20 = read_state(input = k_cache1)[name = string("coreml_update_state_20")];
tensor<int32, [4]> v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_19)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")];
write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_21_write_state")];
tensor<fp16, [6, 1, 448, 512]> coreml_update_state_21 = read_state(input = v_cache1)[name = string("coreml_update_state_21")];
int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)];
int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(512)];
int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)];
bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)];
tensor<int32, [3]> concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")];
tensor<int32, [3]> var_976_begin_0 = const()[name = string("op_976_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_976_end_mask_0 = const()[name = string("op_976_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = concat_98, end_mask = var_976_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_976_cast_fp16")];
tensor<int32, [3]> var_979_begin_0 = const()[name = string("op_979_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_979_end_mask_0 = const()[name = string("op_979_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = concat_98, end_mask = var_979_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_979_cast_fp16")];
tensor<int32, [4]> concat_100x = const()[name = string("concat_100x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_989_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_989_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_35_cast_fp16 = mul(x = var_989_cast_fp16, y = const_46_to_fp16)[name = string("q_35_cast_fp16")];
tensor<int32, [4]> concat_101x = const()[name = string("concat_101x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_996_cast_fp16 = reshape(shape = concat_101x, x = var_976_cast_fp16)[name = string("op_996_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> k_45_cast_fp16 = mul(x = var_996_cast_fp16, y = const_47_to_fp16)[name = string("k_45_cast_fp16")];
tensor<int32, [4]> concat_102x = const()[name = string("concat_102x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_1003_cast_fp16 = reshape(shape = concat_102x, x = var_979_cast_fp16)[name = string("op_1003_cast_fp16")];
tensor<int32, [4]> var_1004 = const()[name = string("op_1004"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, ?]> transpose_66 = transpose(perm = transpose_66_perm_0, x = k_45_cast_fp16)[name = string("transpose_86")];
tensor<fp16, [1, 8, ?, 64]> transpose_65 = transpose(perm = transpose_65_perm_0, x = q_35_cast_fp16)[name = string("transpose_87")];
tensor<fp16, [1, 8, ?, ?]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_65, y = transpose_66)[name = string("qk_25_cast_fp16")];
int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)];
int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)];
bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")];
tensor<int32, [2]> var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [?, 448]> var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = concat_103, end_mask = var_1007_end_mask_0, x = mask_to_fp16)[name = string("op_1007_cast_fp16")];
int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)];
int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)];
bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")];
tensor<int32, [2]> var_1008_begin_0 = const()[name = string("op_1008_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_1008_end_mask_0 = const()[name = string("op_1008_end_mask_0"), val = tensor<bool, [2]>([true, false])];
tensor<fp16, [?, ?]> var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = concat_104, end_mask = var_1008_end_mask_0, x = var_1007_cast_fp16)[name = string("op_1008_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1008_cast_fp16)[name = string("qk_27_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> var_1011_cast_fp16 = softmax(axis = var_920, x = qk_27_cast_fp16)[name = string("op_1011_cast_fp16")];
bool var_1013_transpose_x_0 = const()[name = string("op_1013_transpose_x_0"), val = bool(false)];
bool var_1013_transpose_y_0 = const()[name = string("op_1013_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, ?, 64]> v_45_cast_fp16 = transpose(perm = var_1004, x = var_1003_cast_fp16)[name = string("transpose_88")];
tensor<fp16, [1, 8, ?, 64]> var_1013_cast_fp16 = matmul(transpose_x = var_1013_transpose_x_0, transpose_y = var_1013_transpose_y_0, x = var_1011_cast_fp16, y = v_45_cast_fp16)[name = string("op_1013_cast_fp16")];
tensor<int32, [4]> var_1014 = const()[name = string("op_1014"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_105x = const()[name = string("concat_105x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_1015_cast_fp16 = transpose(perm = var_1014, x = var_1013_cast_fp16)[name = string("transpose_85")];
tensor<fp16, [1, ?, 512]> x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1015_cast_fp16)[name = string("x_79_cast_fp16")];
tensor<fp16, [512, 512]> var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86515776)))];
tensor<fp16, [512]> var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87040128)))];
tensor<fp16, [1, ?, 512]> linear_35_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")];
tensor<int32, [1]> var_1027_axes_0 = const()[name = string("op_1027_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87041216)))];
tensor<fp16, [512]> blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87042304)))];
tensor<fp16, [1, ?, 512]> var_1027_cast_fp16 = layer_norm(axes = var_1027_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1027_cast_fp16")];
tensor<fp16, [512, 512]> var_1036_to_fp16 = const()[name = string("op_1036_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87043392)))];
tensor<fp16, [512]> var_1037_to_fp16 = const()[name = string("op_1037_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87567744)))];
tensor<fp16, [1, ?, 512]> linear_36_cast_fp16 = linear(bias = var_1037_to_fp16, weight = var_1036_to_fp16, x = var_1027_cast_fp16)[name = string("linear_36_cast_fp16")];
tensor<int32, [3]> concat_106 = const()[name = string("concat_106"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [3]> concat_108 = const()[name = string("concat_108"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_109 = const()[name = string("concat_109"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [4]> concat_110x = const()[name = string("concat_110x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_1057_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1057_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_39_cast_fp16 = mul(x = var_1057_cast_fp16, y = const_48_to_fp16)[name = string("q_39_cast_fp16")];
tensor<int32, [4]> var_1063 = const()[name = string("op_1063"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_1064_cast_fp16 = reshape(shape = var_1063, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1064_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, 1500, 8, 64]> k_49_cast_fp16 = mul(x = var_1064_cast_fp16, y = const_49_to_fp16)[name = string("k_49_cast_fp16")];
tensor<int32, [4]> var_1070 = const()[name = string("op_1070"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_1071_cast_fp16 = reshape(shape = var_1070, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1071_cast_fp16")];
tensor<int32, [4]> var_1072 = const()[name = string("op_1072"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, 1500]> transpose_68 = transpose(perm = transpose_68_perm_0, x = k_49_cast_fp16)[name = string("transpose_82")];
tensor<fp16, [1, 8, ?, 64]> transpose_67 = transpose(perm = transpose_67_perm_0, x = q_39_cast_fp16)[name = string("transpose_83")];
tensor<fp16, [1, 8, ?, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_67, y = transpose_68)[name = string("qk_29_cast_fp16")];
tensor<fp16, [1, 8, ?, 1500]> var_1076_cast_fp16 = softmax(axis = var_920, x = qk_29_cast_fp16)[name = string("op_1076_cast_fp16")];
bool var_1078_transpose_x_0 = const()[name = string("op_1078_transpose_x_0"), val = bool(false)];
bool var_1078_transpose_y_0 = const()[name = string("op_1078_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, 1500, 64]> v_49_cast_fp16 = transpose(perm = var_1072, x = var_1071_cast_fp16)[name = string("transpose_84")];
tensor<fp16, [1, 8, ?, 64]> var_1078_cast_fp16 = matmul(transpose_x = var_1078_transpose_x_0, transpose_y = var_1078_transpose_y_0, x = var_1076_cast_fp16, y = v_49_cast_fp16)[name = string("op_1078_cast_fp16")];
tensor<int32, [4]> var_1079 = const()[name = string("op_1079"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_111x = const()[name = string("concat_111x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_1080_cast_fp16 = transpose(perm = var_1079, x = var_1078_cast_fp16)[name = string("transpose_81")];
tensor<fp16, [1, ?, 512]> x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1080_cast_fp16)[name = string("x_85_cast_fp16")];
tensor<fp16, [512, 512]> var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87568832)))];
tensor<fp16, [512]> var_1085_to_fp16 = const()[name = string("op_1085_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88093184)))];
tensor<fp16, [1, ?, 512]> linear_37_cast_fp16 = linear(bias = var_1085_to_fp16, weight = var_1084_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")];
tensor<int32, [1]> var_1092_axes_0 = const()[name = string("op_1092_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88094272)))];
tensor<fp16, [512]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88095360)))];
tensor<fp16, [1, ?, 512]> var_1092_cast_fp16 = layer_norm(axes = var_1092_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1092_cast_fp16")];
tensor<fp16, [2048, 512]> var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88096448)))];
tensor<fp16, [2048]> var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90193664)))];
tensor<fp16, [1, ?, 2048]> linear_38_cast_fp16 = linear(bias = var_1102_to_fp16, weight = var_1101_to_fp16, x = var_1092_cast_fp16)[name = string("linear_38_cast_fp16")];
string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")];
tensor<fp16, [1, ?, 2048]> x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")];
tensor<fp16, [512, 2048]> var_1107_to_fp16 = const()[name = string("op_1107_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90197824)))];
tensor<fp16, [512]> var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92295040)))];
tensor<fp16, [1, ?, 512]> linear_39_cast_fp16 = linear(bias = var_1108_to_fp16, weight = var_1107_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")];
tensor<int32, [4]> k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
tensor<int32, [4]> k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 512])];
tensor<bool, [4]> k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_20)[name = string("k_cache_21_cast_fp16")];
tensor<int32, [4]> v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
tensor<int32, [4]> v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 512])];
tensor<bool, [4]> v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 448, 512]> v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_21)[name = string("v_cache_21_cast_fp16")];
tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 512])];
tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 512])];
tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [1, 1500, 512]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
int32 var_1131 = const()[name = string("op_1131"), val = int32(-1)];
tensor<int32, [1]> var_1149_axes_0 = const()[name = string("op_1149_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92296128)))];
tensor<fp16, [512]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92297216)))];
fp16 var_1137_to_fp16 = const()[name = string("op_1137_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_1149_cast_fp16 = layer_norm(axes = var_1149_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1149_cast_fp16")];
tensor<fp16, [512, 512]> var_1160_to_fp16 = const()[name = string("op_1160_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92298304)))];
tensor<fp16, [512]> var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92822656)))];
tensor<fp16, [1, ?, 512]> linear_40_cast_fp16 = linear(bias = var_1161_to_fp16, weight = var_1160_to_fp16, x = var_1149_cast_fp16)[name = string("linear_40_cast_fp16")];
tensor<fp16, [512, 512]> var_1164_to_fp16 = const()[name = string("op_1164_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92823744)))];
tensor<fp16, [1, ?, 512]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1164_to_fp16, x = var_1149_cast_fp16)[name = string("linear_41_cast_fp16")];
tensor<fp16, [512, 512]> var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93348096)))];
tensor<fp16, [512]> var_1169_to_fp16 = const()[name = string("op_1169_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93872448)))];
tensor<fp16, [1, ?, 512]> linear_42_cast_fp16 = linear(bias = var_1169_to_fp16, weight = var_1168_to_fp16, x = var_1149_cast_fp16)[name = string("linear_42_cast_fp16")];
tensor<int32, [3]> var_1171_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1171_shape_cast_fp16")];
int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
string var_1171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
tensor<uint16, [3]> var_1171_shape_cast_fp16_to_uint16 = cast(dtype = var_1171_shape_cast_fp16_to_uint16_dtype_0, x = var_1171_shape_cast_fp16)[name = string("cast_68")];
uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1171_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_67")];
int32 end_step = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step")];
tensor<int32, [1]> expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step)[name = string("expand_dims_83")];
tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([5])];
int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")];
tensor<int32, [1]> concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")];
tensor<int32, [4]> k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_20)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")];
write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_22_write_state")];
tensor<int32, [4]> v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_21)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")];
write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_23_write_state")];
int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)];
int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(512)];
int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
tensor<int32, [3]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step, concat_120_values2_0))[name = string("concat_120")];
tensor<int32, [3]> var_1187_begin_0 = const()[name = string("op_1187_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_1187_end_mask_0 = const()[name = string("op_1187_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_1187_cast_fp16 = slice_by_index(begin = var_1187_begin_0, end = concat_120, end_mask = var_1187_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1187_cast_fp16")];
tensor<int32, [3]> var_1190_begin_0 = const()[name = string("op_1190_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<bool, [3]> var_1190_end_mask_0 = const()[name = string("op_1190_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<fp16, [1, ?, 512]> var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = concat_120, end_mask = var_1190_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1190_cast_fp16")];
tensor<int32, [4]> concat_122x = const()[name = string("concat_122x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_1200_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1200_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_43_cast_fp16 = mul(x = var_1200_cast_fp16, y = const_50_to_fp16)[name = string("q_43_cast_fp16")];
tensor<int32, [4]> concat_123x = const()[name = string("concat_123x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_1207_cast_fp16 = reshape(shape = concat_123x, x = var_1187_cast_fp16)[name = string("op_1207_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> k_55_cast_fp16 = mul(x = var_1207_cast_fp16, y = const_51_to_fp16)[name = string("k_55_cast_fp16")];
tensor<int32, [4]> concat_124x = const()[name = string("concat_124x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_1214_cast_fp16 = reshape(shape = concat_124x, x = var_1190_cast_fp16)[name = string("op_1214_cast_fp16")];
tensor<int32, [4]> var_1215 = const()[name = string("op_1215"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, ?]> transpose_70 = transpose(perm = transpose_70_perm_0, x = k_55_cast_fp16)[name = string("transpose_78")];
tensor<fp16, [1, 8, ?, 64]> transpose_69 = transpose(perm = transpose_69_perm_0, x = q_43_cast_fp16)[name = string("transpose_79")];
tensor<fp16, [1, 8, ?, ?]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_69, y = transpose_70)[name = string("qk_31_cast_fp16")];
int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)];
int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)];
bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")];
tensor<int32, [2]> var_1218_begin_0 = const()[name = string("op_1218_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_1218_end_mask_0 = const()[name = string("op_1218_end_mask_0"), val = tensor<bool, [2]>([false, true])];
tensor<fp16, [?, 448]> var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = concat_125, end_mask = var_1218_end_mask_0, x = mask_to_fp16)[name = string("op_1218_cast_fp16")];
int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)];
int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
tensor<int32, [2]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")];
tensor<int32, [2]> var_1219_begin_0 = const()[name = string("op_1219_begin_0"), val = tensor<int32, [2]>([0, 0])];
tensor<bool, [2]> var_1219_end_mask_0 = const()[name = string("op_1219_end_mask_0"), val = tensor<bool, [2]>([true, false])];
tensor<fp16, [?, ?]> var_1219_cast_fp16 = slice_by_index(begin = var_1219_begin_0, end = concat_126, end_mask = var_1219_end_mask_0, x = var_1218_cast_fp16)[name = string("op_1219_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1219_cast_fp16)[name = string("qk_33_cast_fp16")];
tensor<fp16, [1, 8, ?, ?]> var_1222_cast_fp16 = softmax(axis = var_1131, x = qk_33_cast_fp16)[name = string("op_1222_cast_fp16")];
bool var_1224_transpose_x_0 = const()[name = string("op_1224_transpose_x_0"), val = bool(false)];
bool var_1224_transpose_y_0 = const()[name = string("op_1224_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, ?, 64]> v_55_cast_fp16 = transpose(perm = var_1215, x = var_1214_cast_fp16)[name = string("transpose_80")];
tensor<fp16, [1, 8, ?, 64]> var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = var_1222_cast_fp16, y = v_55_cast_fp16)[name = string("op_1224_cast_fp16")];
tensor<int32, [4]> var_1225 = const()[name = string("op_1225"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_127x = const()[name = string("concat_127x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_1226_cast_fp16 = transpose(perm = var_1225, x = var_1224_cast_fp16)[name = string("transpose_77")];
tensor<fp16, [1, ?, 512]> x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1226_cast_fp16)[name = string("x_97_cast_fp16")];
tensor<fp16, [512, 512]> var_1230_to_fp16 = const()[name = string("op_1230_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93873536)))];
tensor<fp16, [512]> var_1231_to_fp16 = const()[name = string("op_1231_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94397888)))];
tensor<fp16, [1, ?, 512]> linear_43_cast_fp16 = linear(bias = var_1231_to_fp16, weight = var_1230_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")];
tensor<int32, [1]> var_1238_axes_0 = const()[name = string("op_1238_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94398976)))];
tensor<fp16, [512]> blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94400064)))];
tensor<fp16, [1, ?, 512]> var_1238_cast_fp16 = layer_norm(axes = var_1238_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1238_cast_fp16")];
tensor<fp16, [512, 512]> var_1247_to_fp16 = const()[name = string("op_1247_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94401152)))];
tensor<fp16, [512]> var_1248_to_fp16 = const()[name = string("op_1248_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94925504)))];
tensor<fp16, [1, ?, 512]> linear_44_cast_fp16 = linear(bias = var_1248_to_fp16, weight = var_1247_to_fp16, x = var_1238_cast_fp16)[name = string("linear_44_cast_fp16")];
tensor<int32, [3]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_129 = const()[name = string("concat_129"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [3]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [3]>([0, 0, 0])];
tensor<int32, [3]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [3]>([0, 1500, 0])];
tensor<int32, [3]> v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<bool, [3]> v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<bool, [3]> v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
tensor<bool, [3]> v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
tensor<fp16, [1, 1500, 512]> v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")];
tensor<int32, [4]> concat_132x = const()[name = string("concat_132x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
tensor<fp16, [1, ?, 8, 64]> var_1268_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1268_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, ?, 8, 64]> q_cast_fp16 = mul(x = var_1268_cast_fp16, y = const_52_to_fp16)[name = string("q_cast_fp16")];
tensor<int32, [4]> var_1274 = const()[name = string("op_1274"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_1275_cast_fp16 = reshape(shape = var_1274, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1275_cast_fp16")];
tensor<fp16, [1, 1, 1, 1]> const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
tensor<fp16, [1, 1500, 8, 64]> k_cast_fp16 = mul(x = var_1275_cast_fp16, y = const_53_to_fp16)[name = string("k_cast_fp16")];
tensor<int32, [4]> var_1281 = const()[name = string("op_1281"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
tensor<fp16, [1, 1500, 8, 64]> var_1282_cast_fp16 = reshape(shape = var_1281, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1282_cast_fp16")];
tensor<int32, [4]> var_1283 = const()[name = string("op_1283"), val = tensor<int32, [4]>([0, 2, 1, 3])];
bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 8, 64, 1500]> transpose_72 = transpose(perm = transpose_72_perm_0, x = k_cast_fp16)[name = string("transpose_74")];
tensor<fp16, [1, 8, ?, 64]> transpose_71 = transpose(perm = transpose_71_perm_0, x = q_cast_fp16)[name = string("transpose_75")];
tensor<fp16, [1, 8, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_71, y = transpose_72)[name = string("qk_cast_fp16")];
tensor<fp16, [1, 8, ?, 1500]> var_1287_cast_fp16 = softmax(axis = var_1131, x = qk_cast_fp16)[name = string("op_1287_cast_fp16")];
bool var_1289_transpose_x_0 = const()[name = string("op_1289_transpose_x_0"), val = bool(false)];
bool var_1289_transpose_y_0 = const()[name = string("op_1289_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 8, 1500, 64]> v_cast_fp16 = transpose(perm = var_1283, x = var_1282_cast_fp16)[name = string("transpose_76")];
tensor<fp16, [1, 8, ?, 64]> var_1289_cast_fp16 = matmul(transpose_x = var_1289_transpose_x_0, transpose_y = var_1289_transpose_y_0, x = var_1287_cast_fp16, y = v_cast_fp16)[name = string("op_1289_cast_fp16")];
tensor<int32, [4]> var_1290 = const()[name = string("op_1290"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> concat_133x = const()[name = string("concat_133x"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, ?, 8, 64]> var_1291_cast_fp16 = transpose(perm = var_1290, x = var_1289_cast_fp16)[name = string("transpose_73")];
tensor<fp16, [1, ?, 512]> x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1291_cast_fp16)[name = string("x_103_cast_fp16")];
tensor<fp16, [512, 512]> var_1295_to_fp16 = const()[name = string("op_1295_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94926592)))];
tensor<fp16, [512]> var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95450944)))];
tensor<fp16, [1, ?, 512]> linear_45_cast_fp16 = linear(bias = var_1296_to_fp16, weight = var_1295_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")];
tensor<int32, [1]> var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95452032)))];
tensor<fp16, [512]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95453120)))];
tensor<fp16, [1, ?, 512]> var_1303_cast_fp16 = layer_norm(axes = var_1303_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1303_cast_fp16")];
tensor<fp16, [2048, 512]> var_1312_to_fp16 = const()[name = string("op_1312_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95454208)))];
tensor<fp16, [2048]> var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97551424)))];
tensor<fp16, [1, ?, 2048]> linear_46_cast_fp16 = linear(bias = var_1313_to_fp16, weight = var_1312_to_fp16, x = var_1303_cast_fp16)[name = string("linear_46_cast_fp16")];
string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")];
tensor<fp16, [1, ?, 2048]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")];
tensor<fp16, [512, 2048]> var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97555584)))];
tensor<fp16, [512]> var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99652800)))];
tensor<fp16, [1, ?, 512]> linear_47_cast_fp16 = linear(bias = var_1319_to_fp16, weight = var_1318_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")];
tensor<fp16, [1, ?, 512]> x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")];
tensor<int32, [1]> var_1332_axes_0 = const()[name = string("op_1332_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99653888)))];
tensor<fp16, [512]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99654976)))];
fp16 var_1323_to_fp16 = const()[name = string("op_1323_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, ?, 512]> var_1332_cast_fp16 = layer_norm(axes = var_1332_axes_0, beta = ln_bias_to_fp16, epsilon = var_1323_to_fp16, gamma = ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1332_cast_fp16")];
tensor<fp16, [51865]> var_1342_bias_0_to_fp16 = const()[name = string("op_1342_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99656064)))];
tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_1342_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_1332_cast_fp16)[name = string("op_1342_cast_fp16")];
} -> (logits);
}